Here I'm posting a complete C program to implement a two pass assembler. A 2-pass assembler is a program that accepts an assembly language program as input and produces its machine language equivalent along with the information for the loader. The assembling of source program to object code requires to accomplish following functions.
- Convert mnemonic opcodes to their machine language equivalent.(e.g, LDA to 00)
- Convert symbolic operands to their equivalent machine address (eg, LOOP to 2045)
- Allocate necessary memory.
- Convert data constants to internal machine equivalents.
- Write the object program and assembly listing.
A two pass assembler do these functions in two passes ,i.e., two times the input programs will be scanned/processed.
PASS 1
- Assign addresses to all statements in the program.
- Addresses of symbolic labels are stored.
- Some assemble directives will be processed.
PASS 2
- Translate opcodes and symbolic operands.
- Generate data values defined by BYTE,WORD etc.
- Assemble directives will be processed.
- Write the object program and assembly listing.
ASSEMBLER.C
/*
PROGRAM NAME: ASSEMBLER.C
AIM: TO IMPLEMENT A TWO-PASS ASSEMBLER
INPUT: "input.txt"
OUTPUT: "inter.txt" , "output.txt"
DATE: 08/09/2011
*/
#include<stdio.h>
#include<string.h>
void chk_label();
void chk_opcode();
void READ_LINE();
struct optab
{
char code[10],objcode[10];
}myoptab[3]={
{"LDA","00"},
{"JMP","01"},
{"STA","02"}
};
struct symtab{
char symbol[10];
int addr;
}mysymtab[10];
int startaddr,locctr,symcount=0,length;
char line[20],label[8],opcode[8],operand[8],programname[10];
// void PASS1()
{
FILE *input,*inter;
input=fopen("input.txt","r");
inter=fopen("inter.txt","w");
printf("LOCATION LABEL\tOPERAND\tOPCODE\n");
printf("_____________________________________");
fgets(line,20,input);
READ_LINE();
if(!strcmp(opcode,"START"))
{
startaddr=atoi(operand);
locctr=startaddr;
strcpy(programname,label);
fprintf(inter,"%s",line);
fgets(line,20,input);
}
else
{
programname[0]='\0';
startaddr=0;
locctr=0;
}
printf("\n %d\t %s\t%s\t %s",locctr,label,opcode,operand);
while(strcmp(line,"END")!=0)
{
READ_LINE();
printf("\n %d\t %s \t%s\t %s",locctr,label,opcode,operand);
if(label[0]!='\0')chk_label();
chk_opcode();
fprintf(inter,"%s %s %s\n",label,opcode,operand);
fgets(line,20,input);
}
printf("\n %d\t\t%s",locctr,line);
fprintf(inter,"%s",line);
fclose(inter);
fclose(input);
}
//=====================================================================================================================================
void PASS2()
{
FILE *inter,*output;
char record[30],part[6],value[5];
int currtxtlen=0,foundopcode,foundoperand,chk,operandaddr,recaddr=0;
inter=fopen("inter.txt","r");
output=fopen("output.txt","w");
fgets(line,20,inter);
READ_LINE();
if(!strcmp(opcode,"START")) fgets(line,20,inter);
printf("\n\nCorresponding Object code is..\n");
printf("\nH^ %s ^ %d ^ %d ",programname,startaddr,length);
fprintf(output,"\nH^ %s ^ %d ^ %d ",programname,startaddr,length);
recaddr=startaddr; record[0]='\0';
while(strcmp(line,"END")!=0)
{
operandaddr=foundoperand=foundopcode=0;
value[0]=part[0]= '\0';
READ_LINE();
for(chk=0;chk<3;chk++)
{
if(!strcmp(opcode,myoptab[chk].code))
{
foundopcode=1;
strcpy(part,myoptab[chk].objcode);
if(operand[0]!='\0')
{
for(chk=0;chk<symcount;chk++)
if(!strcmp(mysymtab[chk].symbol,operand))
{
itoa(mysymtab[chk].addr,value,10);
strcat(part,value);
foundoperand=1;
}
if(!foundoperand)strcat(part,"err");
}
}
}
if(!foundopcode)
{
if(strcmp(opcode,"BYTE")==0 || strcmp(opcode,"WORD")||strcmp(opcode,"RESB"))
{strcat(part,operand);
}}
if((currtxtlen+strlen(part))<=8)
{
strcat(record,"^");
strcat(record,part);
currtxtlen+=strlen(part);
}
else
{
printf("\nT^ %d ^%d %s",recaddr,currtxtlen,record);
fprintf(output,"\nT^ %d ^%d %s",recaddr,currtxtlen,record);
recaddr+=currtxtlen;
currtxtlen=strlen(part);
strcpy(record,part);
}
fgets(line,20,inter);
}
printf("\nT^ %d ^%d %s",recaddr,currtxtlen,record);
fprintf(output,"\nT^ %d ^%d %s",recaddr,currtxtlen,record);
printf("\nE^ %d\n",startaddr);
fprintf(output,"\nE^ %d\n",startaddr);
fclose(inter);
fclose(output);
}
//=================================================
void READ_LINE()
{
char buff[8],word1[8],word2[8],word3[8];
int i,j=0,count=0;
label[0]=opcode[0]=operand[0]=word1[0]=word2[0]=word3[0]='\0';
for(i=0;line[i]!='\0';i++)
{
if(line[i]!=' ')buff[j++]=line[i];
else
{
buff[j]='\0';
strcpy(word3,word2);strcpy(word2,word1);strcpy(word1,buff);
j=0;count++;
}
}
buff[j-1]='\0';
strcpy(word3,word2);
strcpy(word2,word1);
strcpy(word1,buff);
switch(count)
{
case 0:strcpy(opcode,word1);break;
case 1:{strcpy(opcode,word2);strcpy(operand,word1);}break;
case 2:{strcpy(label,word3);strcpy(opcode,word2);strcpy(operand,word1);}break;
}
}
//======================================================
void chk_label()
{
int k,dupsym=0;
for(k=0;k<symcount;k++)
if(!strcmp(label,mysymtab[k].symbol))
{
mysymtab[k].addr=-1;
dupsym=1;
break;
}
if(!dupsym)
{
strcpy(mysymtab[symcount].symbol,label);
mysymtab[symcount++].addr=locctr;
}
}
// =====================================================
void chk_opcode()
{
int k=0,found=0;
for(k=0;k<3;k++)
if(!strcmp(opcode,myoptab[k].code))
{
locctr+=3;
found=1;
break;
}
if(!found)
{
if(!strcmp( opcode,"WORD")) locctr+=3;
else if (!strcmp(opcode,"RESW"))locctr+=(3*atoi(operand));
else if(!strcmp(opcode,"RESB"))locctr+=atoi(operand);
}
}
//==================================================
int main()
{
PASS1();
length=locctr-startaddr;
PASS2();
getch();
}
/*
Do not forget to create "input.txt","inter.txt" and "output.txt" prior to run this program. For sake of simplicity, only a few opcodes are included in this program. So for better results try to include input programs with these opcodes.
Here is one sample program (of course, it has no logic..)
input.txt
MYPGM START 1000
STA
LOOP1 JMP LOOP2
LDA
LOOP2 JMP LOOP1
RESB 04
LDA
STA
JMP LOOP1
END
STA
LOOP1 JMP LOOP2
LDA
LOOP2 JMP LOOP1
RESB 04
LDA
STA
JMP LOOP1
END
inter.txt
MYPGM START 1000
STA
LOOP1 JMP LOOP2
LDA
LOOP2 JMP LOOP1
RESB 04
LDA
STA
JMP LOOP1
END
output.txt
H^ MYPGM ^ 1000 ^ 25
T^ 1000 ^8 ^02^011009
T^ 1008 ^8 00^011003
T^ 1016 ^6 04^00^02
T^ 1022 ^6 011003
E^ 1000
Happy programming..!!!!!!!
*/
16 comments :
nice work!!!!!!!!!!
@Anonymous Thank you 4 ur feedback..!!!
Nice man really thanking you
@Deepu
Thanks Deepu..!!!! :)
really good one thank u
@hinakoushar a t thank you for your valuable feedback.!! :)
please post a c program that will get an input a >ASM text file ....
the program should store the literals in literal table and symbols in symbol table.........
the program should display the input text file,symbol table and literal table..
plz its urgent......
why I cam run with this?
and the download link is invalid...
Hi! I want to learn writing my own asm. I analyzed your code in detail and like ideas you applied here!
But when I tried to run your program in M$ VS2010 I've got message
"stack around variable 'part' has been corrupted". There's small error in code of 'PASS2':
Look at declaration of 'part':
char record[30],part[6],value[5];
You have 6 cells of array
And look at group of instructions that's a bit farther
if((currtxtlen+strlen(part))<=8)
{
strcat(record,"^");
strcat(record,part);
currtxtlen+=strlen(part);
}
If it turns out that you will write more than 6 chars to 'part' (and less than 9) there will be buffer overflow and you destroy 1 or 2 bytes on the stack depending on number of bytes written to 'part'.
i guess dis prg still has buffer overflow issue....coz i m gettin e address 1025 repeatedly in the output..
@Anonymous
how did you solve this overflow issue?
i need program of multi pass assembler of sic/xe... pls help
need code in c++ for both passes
Nice work..
Goes in infinite loop
Need the code for symbol table in c++
Post a Comment