Showing posts with label Language processing programs. Show all posts
Showing posts with label Language processing programs. Show all posts

Saturday, October 8

LEX program to check the syntax of SCANF statement

%{
#include<stdio.h>
#include<ctype.h>
int i=0,j=0;
%}
a "%d"|"%f"|"%c"|"%s"
id [a-zA-Z][a-zA-Z0-9]*
%%
scanf\((\"({a}*|.*)*\"(,\&{id})*\))\;

{for(i=0;i<yyleng;i++)
{if(yytext[i]=='%')
j++;
if(yytext[i]==',')
j--;
}

if(j==0)
printf("Correct");
else
printf("Incorrect");
}
%%
main()
{ yyin=fopen("sample.c","r");
yylex();
}
int yywrap()
{

return 1;
}        

LEX program to check the syntax of PRINTF statement



%{
#include<stdio.h>
#include<ctype.h>
int i=0,j=0;
%}
a "%d"|"%f"|"%c"|"%s"
id [a-zA-Z][a-zA-Z0-9]*
%%

printf\((\"({a}*|.*)*\"(,{id})*\))\; {for(i=0;i<yyleng;i++) {
if(yytext[i]=='%')
j++;
if(yytext[i]==',')
j--;
}

if(j==0)
printf("Correct..!!");
else
printf("Incorrect..!!");
}
%%

main()
{

yyin=fopen("sample.c","r");
yylex();
}


int yywrap()
{

return 1;
}      




Explanation:

Well, checking syntax of printf statement is very easy.
The syntax should be "printf" followed by "(" and  . It can include a usual string or %d,%f type expression delimited by another ".

The entities specified by %f typed expression should be specified before ")" separated by commas.

The number of arguments is then checked within rule part ( i.e, no. of % is equal to no. of commas).


For more interesting LEX programs , click here...!!


                            

Lex program to check the syntax of FOR loop



%{
#include<stdio.h>
#include<ctype.h>
int c=1;
%}
op "++"|"--"
rop "<"|">"|"<="|">="|"=="|"!="
id [a-zA-Z][a-zA-Z0-9]*
no [0-9]*
pp [\n]
%%
for\(({id}=({no}|{id}))?\;{id}{rop}({id}|{no})\;{id}{op}\){pp}+\{(.*\n)*.*\} {printf("correct");c=0;}
%%
main()
{ yyin=fopen("file11.c","r");
yylex();
if(c==1)
printf("incorrect");
}
int yywrap()
{
return 1;
}

FIRST of a Given Grammar using C program


Hello Friend,

In this post we will discuss how to find FIRST of a grammar using C.


The rules for finding FIRST of a given grammar is:
  1. If X is terminal, FIRST(X) = {X}.
  2. If X → ε is a production, then add ε to FIRST(X).
  3. If X is a non-terminal, and X → Y1 Y2 … Yk is a production, and ε is in all of FIRST(Y1), …, FIRST(Yk), then add ε to FIRST(X).
  4. If X is a non-terminal, and X → Y1 Y2 … Yk is a production, then add a to FIRST(X) if for some i, a is in FIRST(Yi), and ε is in all of FIRST(Y1), …, FIRST(Yi-1).




  1. Each Non terminal character is represented by one Uppercase letter.
  2. Each Terminal character is represented by one lowercase letter.
  3. LHS and RHS of each production will be separated by a "=" symbol.
  4. There will be no Blank space within a production string. 
  5. Assumed that Left recursion is avoided in all input productions. 



/* Author : Vipin
* mail:   vipinnarayantvm@gmail.com
 *Modified on : 23/03/2105
 */

#include<stdio.h>
#include<ctype.h>

void FIRST(char[],char );
void addToResultSet(char[],char);
int numOfProductions;
char productionSet[10][10];

main()
{
    int i;
    char choice; 
    char c;
    char result[20];
    printf("How many number of productions ? :");
    scanf(" %d",&numOfProductions);

    for(i=0;i<numOfProductions;i++)//read production string eg: E=E+T
    {
        printf("Enter productions Number %d : ",i+1);
        scanf(" %s",productionSet[i]);
    }
    do
    {

        printf("\n Find the FIRST of  :");
        scanf(" %c",&c);
        FIRST(result,c); //Compute FIRST; Get Answer in 'result' array
        printf("\n FIRST(%c)= { ",c);
        for(i=0;result[i]!='\0';i++)
        printf(" %c ",result[i]);       //Display result
        printf("}\n");

         printf("press 'y' to continue : ");
        scanf(" %c",&choice);
    }
    while(choice=='y'||choice =='Y');


}
/*
 *Function FIRST:
 *Compute the elements in FIRST(c) and write them
 *in Result Array.
 */
void FIRST(char* Result,char c)
{
    int i,j,k;
    char subResult[20];
    int foundEpsilon;
    subResult[0]='\0';
    Result[0]='\0';

    //If X is terminal, FIRST(X) = {X}.
    if(!(isupper(c)))
    {

        addToResultSet(Result,c);
               return ;
    }
    //If X is non terminal

    //Read each production
    for(i=0;i<numOfProductions;i++)
    {

//Find production with X as LHS
        if(productionSet[i][0]==c)
        {
//If X  ε is a production, then add ε to FIRST(X).
 if(productionSet[i][2]=='$') addToResultSet(Result,'$');

            //If X is a non-terminal, and X  Y1 Y2  Yk
            //is a production, then add a to FIRST(X)
            //if for some i, a is in FIRST(Yi),
            //and ε is in all of FIRST(Y1), …, FIRST(Yi-1).
      else
            {
                j=2;
                while(productionSet[i][j]!='\0')
                {
                foundEpsilon=0;
                FIRST(subResult,productionSet[i][j]);
                for(k=0;subResult[k]!='\0';k++)
                    addToResultSet(Result,subResult[k]);
                 for(k=0;subResult[k]!='\0';k++)
                     if(subResult[k]=='$')
                     {
                         foundEpsilon=1;
                         break;
                     }
                 //No ε found, no need to check next element
                 if(!foundEpsilon)
                     break;
                 j++;

                }
            }
    }
}

    return ;
}

/* addToResultSet adds the computed
 *element to result set. 
 *This code avoids multiple inclusion of elements
  */
void addToResultSet(char Result[],char val)
{
    int k;

    for(k=0 ;Result[k]!='\0';k++)
        if(Result[k]==val)
            return;
    Result[k]=val;
    Result[k+1]='\0';


}



Output







Friday, September 9

LEX Program to identify Keywords and convert it into uppercase


Guys, apply simple logic. Detect all keywords( This program deal with a small number of keywods for sake of simplicity).
On detecting any keyword in C, convert it into uppercasr letter using the funtion 'toupper' . Game over!!




%{#include<stdio.h>
int i;

%}keyword main|int|scanf|printf|if|else
%%

{keyword} {
 for(i=0;i<yyleng;i++)
 printf("%c",toupper(yytext[i]));
   }

%%

main()
{
yyin=fopen("num.c","r");
yylex();
}


int yywrap()
{
return 1;
}

OutputLet num.c contains following program fragment.

main()
{
int num;
scanf("%d",&num);
if(num%2)printf("Odd");
else printf("Even")
}


The output will be,

MAIN()
{
INT num;
SCANF("%d",&num);
IF(num%2)PRINTF("Odd");
ELSE PRINTF("Even")
}





LEX Program to eliminate all whitespaces

Hi Guys,
This lex program is aimed to eliminate all whitespaces from an input file. As u think, this is not at all a difficult job. We have to simply detect single blank ' ' or tab '\t' or a newline '\n' and delete them in output. That's all..!!!!



%{
#include<stdio.h>
%}

%%
[\n\t ' '] {};
%%
main()
{
yyin=fopen("myfile.txt","r");
yylex();

}
int yywrap()
{
return 1;
}

____________________________________________________________________
Look @ the example:

Basic datatypes in C are:
int   char  float   double

output will be

BasicdatatypesinCare:intcharfloatdouble




.

LEX Program to delete all comments


No Comments  :)


This lex program is intended to eliminate comments from a C program. This task is simple since in a C program, comments will be associated only with '//' or '/*...*/ only. So our aim is to detect the occurence of these characters and ignore subsequent comments.



%{
#include<stdio.h>
%}


%%

\/\/.* ;
\/\*(.*\n)*.*\*\/ ;

%%

main()
{
yyin=fopen("mypgm.c","r");
yylex();
}


int yywrap()
{
return 1;
}





\/\/.* ; eliminates single line coments. i.e., comments of the form ' //comment.. .. .. ;'
It simply look on '/'immediately followed by another '/' and ignore that line.

\/\*(.*\n)*.*\*\/ ; eliminates multiple comments. i.e, code fragment lies within /*...*/


Have  a look on this Example:

Consider mypgm.c

//This is a single comment;
This is not a comment;
/*But..
These are mulptiple comments
*/
Again, It is not a comment


The output will be,

This is not a comment;
Again, It is not a comment

LEX Program to count the number of lines, words and letters


Howdy guys,

Lets have a look on how a Lex programs works using a simple example.

This sample programs counts the number of lines, words and characters in a text file.


 
Lex programming is not rocket science. You have to just apply an eight year old kid's logic. Yes, you read right. Only a school kid's logic.

LOGIC

Read each character from the text file :

  • Is it a capital letter in English? [A-Z] : increment capital letter count by 1.
  • Is it a small letter in English? [a-z] : increment small letter count by 1
  • Is it [0-9]? increment digit count by 1.
  • All other characters (like '!', '@','&') are counted as special characters
  • How to count the number of lines? we simply count the encounters of '\n' <newline> character.that's all!! 
  • To count the number of words we count white spaces and tab character(of course, newline characters too..)

 Alignment of a Lex program is very simple and it makes the logic more vivid.



counter.l
%{
#include<stdio.h>
int lines=0, words=0,s_letters=0,c_letters=0, num=0, spl_char=0,total=0;
%}
%%


 
\n { lines++; words++;}
[\t ' '] words++;
[A-Z] c_letters++;
[a-z] s_letters++;
[0-9] num++;
. spl_char++;
%%





main(void)
{
yyin= fopen("myfile.txt","r");
yylex();
total=s_letters+c_letters+num+spl_char;
printf(" This File contains ...");
printf("\n\t%d lines", lines);
printf("\n\t%d words",words);
printf("\n\t%d small letters", s_letters);
printf("\n\t%d capital letters",c_letters);
printf("\n\t%d digits", num);
printf("\n\t%d special characters",spl_char);
printf("\n\tIn total %d characters.\n",total);
}
 
int yywrap()
{
return(1);
}


Sample output



 Let the 'myfile.txt' contains this.

                         This is my 1st lex program!!!
                         Cheers!! It works!!:)

The output will be


This file contains..
2 lines
9 words
30 small letters
3 capital letters
1 digits
9 special characters
In total 43 characters.



Monday, August 1

C program to implement DFA

//updated on 11/03/2015

Hello friends,


               This post describes how a Deterministic Finte Automata (DFA) can be implemented using C. Do you know, associated with every programming language compiler there is a program named  recognizer that takes a string , say string S, as input and answers "YES" if S is a sentence of the language and  " NO " otherwise..!! To accomplish this , we have to train recognizer about the syntactic structure of intended language ( done with regular expressions). For this purpose, we construct a transition diagram called a "finite automation". In a Deterministic Finite Automation, only one transition out of state is possible on the same input symbol. On the other hand, in NonDeterministic Finite Automata more than one transitions may possible for same input symbol.


  . Some interesting implementations using DFA are:
  • whether DFA accepts all strings.
  • whether DFA accepts any string
  • whether two DFAs recognize the same language
  • DFA with a minimum number of states for a particular regular language



Illustartion


Consider the DFA represented in this transition diagram.



   
                         


Here is a sample DFA with starting state " 0 ", and accepting state " 1" .  "a" and "b" are the input symbols.
This DFA will accept any string containing 'a's and 'b' and last symbol should be 'a'.


CODE:


#include <stdio.h>
#define TOTAL_STATES 2
#define FINAL_STATES 1
#define ALPHABET_CHARCTERS 2
#define UNKNOWN_SYMBOL_ERR 0
#define NOT_REACHED_FINAL_STATE 1
#define REACHED_FINAL_STATE 2
enum DFA_STATES{q0,q1};
enum input{a,b};
int Accepted_states[FINAL_STATES]={q1};
char alphabet[ALPHABET_CHARCTERS]={'a','b'};
int Transition_Table[TOTAL_STATES][ALPHABET_CHARCTERS];
int Current_state=q0;
void DefineDFA()
{
    Transition_Table[q0][a] = q1;
    Transition_Table[q0][b] = q0;
    Transition_Table[q1][a] = q1;
    Transition_Table[q1][b] = q0;
}
int DFA(char current_symbol)
{
int i,pos;
    for(pos=0;pos<ALPHABET_CHARCTERS; pos++)
        if(current_symbol==alphabet[pos])   
            break;//stops if any character other than a or b
    if(pos==ALPHABET_CHARCTERS)
         return UNKNOWN_SYMBOL_ERR;
    for(i=0;i<FINAL_STATES;i++)
 if((Current_state=Transition_Table[Current_state][pos])
==Accepted_states[i])
            return REACHED_FINAL_STATE;
    return NOT_REACHED_FINAL_STATE;
}
int main(void)
{
    char current_symbol;
    int result;
 
    DefineDFA();    //Fill transition table
 
    printf("Enter a string with 'a' s and 'b's:\n
                Press Enter Key to stop\n");
 
 
    while((current_symbol=getchar())!= '\n')
        if((result= DFA(current_symbol))==UNKNOWN_SYMBOL_ERR)
            break;
    switch (result) {
    case UNKNOWN_SYMBOL_ERR:printf("Unknown Symbol %c",
  current_symbol); 
 break;
    case NOT_REACHED_FINAL_STATE:printf("Not accepted"); break;
    case REACHED_FINAL_STATE:printf("Accepted");break;
    default: printf("Unknown Error");
    }
    printf("\n\n\n");
    return 0;
}


OUTPUT
========

String with only "a"s and "b"s and last symbol is "a".     ACCEPTED..!!



String with only "a"s and "b"s but  last symbol is not  "a".     REJECTED..!!



Unknown symbol "y" in input. REJECTED.!!!!