Vipin's Blog..: Language processing programs

Showing posts with label Language processing programs. Show all posts

Saturday, October 8

LEX program to check the syntax of SCANF statement

v!p!n

%{
#include<stdio.h>
#include<ctype.h>
int i=0,j=0;
%}a "%d"|"%f"|"%c"|"%s"
id [a-zA-Z][a-zA-Z0-9]*
%%
scanf\((\"({a}*|.*)*\"(,\&{id})*\))\;
{for(i=0;i<yyleng;i++)
{if(yytext[i]=='%')
j++;
if(yytext[i]==',')
j--;
}
if(j==0)
printf("Correct");
else
printf("Incorrect");
}
%%
main()
{ yyin=fopen("sample.c","r");
yylex();
}
int yywrap()
{
return 1;
}

v!p!n

1 comment

%{
#include<stdio.h>
#include<ctype.h>
int i=0,j=0;
%}a "%d"|"%f"|"%c"|"%s"
id [a-zA-Z][a-zA-Z0-9]*
%%
printf\((\"({a}*|.*)*\"(,{id})*\))\; {for(i=0;i<yyleng;i++) {
if(yytext[i]=='%')
j++;
if(yytext[i]==',')
j--;
}
if(j==0)
printf("Correct..!!");
else
printf("Incorrect..!!");
}
%%
main()
{
yyin=fopen("sample.c","r");
yylex();
}

int yywrap()
{
return 1;
}

Explanation:

Well, checking syntax of printf statement is very easy.
The syntax should be "printf" followed by "(" and " . It can include a usual string or %d,%f type expression delimited by another ".

The entities specified by %f typed expression should be specified before ")" separated by commas.

The number of arguments is then checked within rule part ( i.e, no. of % is equal to no. of commas).

For more interesting LEX programs , click here...!!

v!p!n

3 comments

%{
#include<stdio.h>
#include<ctype.h>
int c=1;
%}
op "++"|"--"
rop "<"|">"|"<="|">="|"=="|"!="
id [a-zA-Z][a-zA-Z0-9]*
no [0-9]*
pp [\n]
%%
for\(({id}=({no}|{id}))?\;{id}{rop}({id}|{no})\;{id}{op}\){pp}+\{(.*\n)*.*\} {printf("correct");c=0;}
%%
main()
{ yyin=fopen("file11.c","r");
yylex();
if(c==1)
printf("incorrect");
}
int yywrap()
{
return 1;
}

v!p!n

46 comments

Hello Friend,

In this post we will discuss how to find FIRST of a grammar using C.

The rules for finding FIRST of a given grammar is:

If X is terminal, FIRST(X) = {X}.
If X → ε is a production, then add ε to FIRST(X).
If X is a non-terminal, and X → Y₁ Y₂ … Y_k is a production, and ε is in all of FIRST(Y₁), …, FIRST(Y_k), then add ε to FIRST(X).
If X is a non-terminal, and X → Y₁ Y₂ … Y_k is a production, then add a to FIRST(X) if for some i, a is in FIRST(Y_i), and ε is in all of FIRST(Y₁), …, FIRST(Y_i-1).

Assumptions:

Each Non terminal character is represented by one Uppercase letter.
Each Terminal character is represented by one lowercase letter.
LHS and RHS of each production will be separated by a "=" symbol.
There will be no Blank space within a production string.
Assumed that Left recursion is avoided in all input productions.

/* Author : Vipin
* mail: vipinnarayantvm@gmail.com

 *Modified on : 23/03/2105

*/

#include<stdio.h>

#include<ctype.h>

void FIRST(char[],char );

void addToResultSet(char[],char);

int numOfProductions;

char productionSet[10][10];

main()

    int i;

    char choice;

    char c;

    char result[20];

    printf("How many number of productions ? :");

    scanf(" %d",&numOfProductions);

    for(i=0;i<numOfProductions;i++)//read production string eg: E=E+T

        printf("Enter productions Number %d : ",i+1);

        scanf(" %s",productionSet[i]);

do

        printf("\n Find the FIRST of  :");

        scanf(" %c",&c);

        FIRST(result,c); //Compute FIRST; Get Answer in 'result' array

        printf("\n FIRST(%c)= { ",c);

        for(i=0;result[i]!='\0';i++)

        printf(" %c ",result[i]);       //Display result

        printf("}\n");

         printf("press 'y' to continue : ");
        scanf(" %c",&choice);

    while(choice=='y'||choice =='Y');

/*

 *Function FIRST:

 *Compute the elements in FIRST(c) and write them

 *in Result Array.

*/

void FIRST(char* Result,char c)

    int i,j,k;

    char subResult[20];

    int foundEpsilon;

    subResult[0]='\0';

    Result[0]='\0';

    //If X is terminal, FIRST(X) = {X}.

    if(!(isupper(c)))

        addToResultSet(Result,c);

               return ;

    //If X is non terminal

    //Read each production

    for(i=0;i<numOfProductions;i++)

//Find production with X as LHS

        if(productionSet[i][0]==c)

//If X → ε is a production, then add ε to FIRST(X).

 if(productionSet[i][2]=='$') addToResultSet(Result,'$');

            //If X is a non-terminal, and X → Y1 Y2 … Yk

            //is a production, then add a to FIRST(X)

            //if for some i, a is in FIRST(Yi),

            //and ε is in all of FIRST(Y1), …, FIRST(Yi-1).

      else

                j=2;

                while(productionSet[i][j]!='\0')

                foundEpsilon=0;

                FIRST(subResult,productionSet[i][j]);

                for(k=0;subResult[k]!='\0';k++)

                    addToResultSet(Result,subResult[k]);

                 for(k=0;subResult[k]!='\0';k++)

                     if(subResult[k]=='$')

                         foundEpsilon=1;

                         break;

                 //No ε found, no need to check next element

                 if(!foundEpsilon)

                     break;

                 j++;

    return ;

/* addToResultSet adds the computed

 *element to result set.

 *This code avoids multiple inclusion of elements

*/

void addToResultSet(char Result[],char val)

    int k;

    for(k=0 ;Result[k]!='\0';k++)

        if(Result[k]==val)

            return;

    Result[k]=val;

    Result[k+1]='\0';

Output

LEX Program to identify Keywords and convert it into uppercase

v!p!n

4 comments

Guys, apply simple logic. Detect all keywords( This program deal with a small number of keywods for sake of simplicity).
On detecting any keyword in C, convert it into uppercasr letter using the funtion 'toupper' . Game over!!

%{#include<stdio.h>
int i;
%}keyword main|int|scanf|printf|if|else
%%

{keyword} {
for(i=0;i<yyleng;i++)
printf("%c",toupper(yytext[i]));
}
%%

main()
{
yyin=fopen("num.c","r");
yylex();
}

int yywrap()
{
return 1;
}
OutputLet num.c contains following program fragment.

main()
{
int num;
scanf("%d",&num);
if(num%2)printf("Odd");
else printf("Even")
}

The output will be,

MAIN()
{
INT num;
SCANF("%d",&num);
IF(num%2)PRINTF("Odd");
ELSE PRINTF("Even")
}

v!p!n

5 comments

Hi Guys,
This lex program is aimed to eliminate all whitespaces from an input file. As u think, this is not at all a difficult job. We have to simply detect single blank ' ' or tab '\t' or a newline '\n' and delete them in output. That's all..!!!!

%{
#include<stdio.h>
%}
%%
[\n\t ' '] {};
%%
main()
{
yyin=fopen("myfile.txt","r");
yylex();
}
int yywrap()
{
return 1;
}
____________________________________________________________________
Look @ the example:

Basic datatypes in C are:
int char float double

output will be

BasicdatatypesinCare:intcharfloatdouble

.

v!p!n

8 comments

No Comments :)

This lex program is intended to eliminate comments from a C program. This task is simple since in a C program, comments will be associated only with '//' or '/*...*/ only. So our aim is to detect the occurence of these characters and ignore subsequent comments.

%{
#include<stdio.h>
%}

%%

\/\/.* ;
\/\*(.*\n)*.*\*\/ ;
%%

main()
{
yyin=fopen("mypgm.c","r");
yylex();
}

int yywrap()
{
return 1;
}

\/\/.* ; eliminates single line coments. i.e., comments of the form ' //comment.. .. .. ;'
It simply look on '/'immediately followed by another '/' and ignore that line.

\/\*(.*\n)*.*\*\/ ; eliminates multiple comments. i.e, code fragment lies within /*...*/

Have a look on this Example:

Consider mypgm.c

//This is a single comment;
This is not a comment;
/*But..
These are mulptiple comments
*/
Again, It is not a comment

The output will be,

This is not a comment;
Again, It is not a comment

v!p!n

30 comments

Howdy guys,

Lets have a look on how a Lex programs works using a simple example.

This sample programs counts the number of lines, words and characters in a text file.

Lex programming is not rocket science. You have to just apply an eight year old kid's logic. Yes, you read right. Only a school kid's logic.

LOGIC

Read each character from the text file :

Is it a capital letter in English? [A-Z] : increment capital letter count by 1.
Is it a small letter in English? [a-z] : increment small letter count by 1
Is it [0-9]? increment digit count by 1.
All other characters (like '!', '@','&') are counted as special characters
How to count the number of lines? we simply count the encounters of '\n' <newline> character.that's all!!
To count the number of words we count white spaces and tab character(of course, newline characters too..)

Alignment of a Lex program is very simple and it makes the logic more vivid.

counter.l

#include<stdio.h>

int lines=0, words=0,s_letters=0,c_letters=0, num=0, spl_char=0,total=0;

\n { lines++; words++;}

[\t ' '] words++;

[A-Z] c_letters++;

[a-z] s_letters++;

[0-9] num++;

. spl_char++;

main(void)

{

yyin= fopen("myfile.txt","r");

yylex();

total=s_letters+c_letters+num+spl_char;

printf(" This File contains ...");

printf("\n\t%d lines", lines);

printf("\n\t%d words",words);

printf("\n\t%d small letters", s_letters);

printf("\n\t%d capital letters",c_letters);

printf("\n\t%d digits", num);

printf("\n\t%d special characters",spl_char);

printf("\n\tIn total %d characters.\n",total);

}

int yywrap()

{

return(1);

}

Sample output

Let the 'myfile.txt' contains this.

This is my 1st lex program!!!

Cheers!! It works!!:)

The output will be

This file contains..

2 lines

9 words

30 small letters

3 capital letters

1 digits

9 special characters

In total 43 characters.

C program to implement DFA

v!p!n

16 comments

//updated on 11/03/2015

Hello friends,

This post describes how a Deterministic Finte Automata (DFA) can be implemented using C. Do you know, associated with every programming language compiler there is a program named recognizer that takes a string , say string S, as input and answers "YES" if S is a sentence of the language and " NO " otherwise..!! To accomplish this , we have to train recognizer about the syntactic structure of intended language ( done with regular expressions). For this purpose, we construct a transition diagram called a "finite automation". In a Deterministic Finite Automation, only one transition out of state is possible on the same input symbol. On the other hand, in NonDeterministic Finite Automata more than one transitions may possible for same input symbol.

. Some interesting implementations using DFA are:

whether DFA accepts all strings.
whether DFA accepts any string
whether two DFAs recognize the same language
DFA with a minimum number of states for a particular regular language

Illustartion

Consider the DFA represented in this transition diagram.

Here is a sample DFA with starting state " 0 ", and accepting state " 1" . "a" and "b" are the input symbols.
This DFA will accept any string containing 'a's and 'b' and last symbol should be 'a'.

CODE:

#include <stdio.h>

#define TOTAL_STATES 2

#define FINAL_STATES 1

#define ALPHABET_CHARCTERS 2

#define UNKNOWN_SYMBOL_ERR 0

#define NOT_REACHED_FINAL_STATE 1

#define REACHED_FINAL_STATE 2

enum DFA_STATES{q0,q1};

enum input{a,b};

int Accepted_states[FINAL_STATES]={q1};

char alphabet[ALPHABET_CHARCTERS]={'a','b'};

int Transition_Table[TOTAL_STATES][ALPHABET_CHARCTERS];

int Current_state=q0;

void DefineDFA()

    Transition_Table[q0][a] = q1;

    Transition_Table[q0][b] = q0;

    Transition_Table[q1][a] = q1;

    Transition_Table[q1][b] = q0;

int DFA(char current_symbol)

int i,pos;

    for(pos=0;pos<ALPHABET_CHARCTERS; pos++)

        if(current_symbol==alphabet[pos])

            break;//stops if any character other than a or b

    if(pos==ALPHABET_CHARCTERS)

         return UNKNOWN_SYMBOL_ERR;

    for(i=0;i<FINAL_STATES;i++)

 if((Current_state=Transition_Table[Current_state][pos])

==Accepted_states[i])

            return REACHED_FINAL_STATE;

    return NOT_REACHED_FINAL_STATE;

int main(void)

    char current_symbol;

    int result;

    DefineDFA();    //Fill transition table

    printf("Enter a string with 'a' s and 'b's:\n

                Press Enter Key to stop\n");

    while((current_symbol=getchar())!= '\n')

        if((result= DFA(current_symbol))==UNKNOWN_SYMBOL_ERR)

            break;

    switch (result) {

    case UNKNOWN_SYMBOL_ERR:printf("Unknown Symbol %c",

  current_symbol);

 break;

    case NOT_REACHED_FINAL_STATE:printf("Not accepted"); break;

    case REACHED_FINAL_STATE:printf("Accepted");break;

    default: printf("Unknown Error");

    printf("\n\n\n");

    return 0;

OUTPUT
========

String with only "a"s and "b"s and last symbol is "a". ACCEPTED..!!

String with only "a"s and "b"s but last symbol is not "a". REJECTED..!!

Unknown symbol "y" in input. REJECTED.!!!!

Vipin's Blog..

Know Me

Followers

Labels

C PROGRAMS

ANDROID PROGRAMS

UNIX PROGRAMMING

Qt

Career..!!

Saturday, October 8

LEX program to check the syntax of SCANF statement

LEX program to check the syntax of PRINTF statement

Lex program to check the syntax of FOR loop

FIRST of a Given Grammar using C program

Friday, September 9

LEX Program to identify Keywords and convert it into uppercase

LEX Program to eliminate all whitespaces

LEX Program to delete all comments

LEX Program to count the number of lines, words and letters

Monday, August 1

C program to implement DFA

Total Pageviews

Categories

Previous posts

Popular Posts