Two Pass Assembler

WHAT IS A SINGLE PASS ASSEMBLER?

It is an assembler that generally generates the object code directly in memory for immediate execution. It parses through your source code only once and you are done. Now let us see how a two pass assembler works. Simple, while on its way, if the assembler encounters an undefined label, it puts it into a symbol table along with the address where the undefined symbol’s value has to be placed when the symbol is found in future

WHY DO WE NEED A TWO-PASS ASSEMBLER?

As explained, the one-pass assembler cannot resolve forward references of data symbols. It requires all data symbols to be defined prior to being used. A two-pass assembler solves this dilemma by devoting one pass to exclusively resolve all (data/label) forward references and then generate object code with no hassles in the next pass. If a data symbol depends on another and this another depends on yet another, the assembler resolved this recursively. If I try explaining even that in this post, the post will become too big. Read this ppt for more details

1. MOT table(machine opcode table)
2. POT table(pseudo opcode table)
3. Base table(storing value of base register)
4. LC ( location counter)

Design of Two pass assembler:

Task performed by the passes of a two pass assembler are as follows:

Required Data structures

1) Opcode Table: Assembler need not to enter information in this table. This table with information is available with the assembler.

#include "Assembler.h"
#include <fstream>
#include <sstream>
#include "MOT.h"
#include<iomanip>

string Assembler::readFileIntoString(const string& path) {
ifstream input_file(path);
if (!input_file.is_open()) {
cerr << "Could not open the file - '"
<< path << "'" << endl;
exit(EXIT_FAILURE);
}
return string((std::istreambuf_iterator<char>(input_file)), std::istreambuf_iterator<char>());
}

int Assembler::len(string str)
{
int length = 0;
for (int i = 0; str[i] != '\0'; i++)
{
length++;
}
return length;
}

void Assembler::split(string str, char seperator)
{
int currIndex = 0, i = 0;
int startIndex = 0, endIndex = 0;
while (i <= len(str))
{
if (str[i] == seperator || i == len(str))
{
endIndex = i;
string subStr = "";
subStr.append(str, startIndex, endIndex - startIndex);
lines.push_back(subStr);
currIndex += 1;
startIndex = endIndex + 1;
}
i++;
}
}

void Assembler::subStringGenerator() {
if (code.length() != 0) {

string str = "#include <stdio.h>\nint main(){ int a,b,c;\n}";
char seperator = '\n'; // space
str = code;
split(str, seperator);
//cout << " The split string is: ";

for (int j = 0; j < lines.size(); j++) {

//cout << "\n i : " << j << " " << lines[j];
}

//Write here //Calling method tokenizer
}
else {
cout << "Something Went Wrong inside the method SubStringGenerator Method";

}
for (int i = 0; i < lines.size(); i++) {

lines[i] = subStringPreprocessor(lines[i]);

}

}

void Assembler::subStringSplitter(string s)
{
temp.clear();
if (s.length() != 0) {

string str = "#include <stdio.h>\nint main(){ int a,b,c;\n}";

char seperator = ' '; // space
str = s;

split_literals(str, seperator);
//cout << " The split string is: ";
}
else {
//cout << "Something Went Wrong inside the method SubStringSpitter Method";

}
for (int i = 0; i < temp.size(); i++) {

temp[i] = subStringPreprocessor(temp[i]);

}

}

string Assembler::subStringPreprocessor(string s)
{
if (true) {

string processed = s;

s = rtrim(s);
s = ltrim(s);
processed = s;

return processed;

}

return s;
}

string Assembler::label_Preprocessor(string label)
{
if (label.find(":") != string::npos) {
label.erase(label.find(":"), 1);
}
label = ltrim(label);
label = rtrim(label);
return label;
}

string Assembler::ltrim(const string& s)
{
string WHITESPACE = " \n\r\t\f\v";
size_t start = s.find_first_not_of(WHITESPACE);
return (start == std::string::npos) ? "" : s.substr(start);
}

string Assembler::rtrim(const std::string& s)
{
string WHITESPACE = " \n\r\t\f\v";
size_t end = s.find_last_not_of(WHITESPACE);
return (end == std::string::npos) ? "" : s.substr(0, end + 1);
}

void Assembler::SymbolTableGeneratorMain()
{

SymbolTableGenrationPart1();
SymbolTableAddressAllocator();
table.printTable();
SymbolTableGenrationPart2();

}

void Assembler::SymbolTableGenrationPart1()
{
int flag = 99;
int index;
cout << "Logs1 : par1 *******************************************\n";
for (int i = 0; i < lines.size(); i++) {

//cout << lines[i] << "-->";
subStringSplitter(lines[i]); // Split the statements in the literals

for (int j = 0; j < temp.size(); j++) {
cout << "\nTemp[" << j << "] = "<< temp[j] <<"\n";
if (temp[j].find(":") != string::npos) {

if (table.search(label_Preprocessor(temp[j])) != -1) {

table.address[table.search(label_Preprocessor(temp[j]))] = i * 2;
//cout << "\n Addres: " << table.address[table.search(label_Preprocessor(temp[j]))] << "Symbol : " << table.symbol[table.search(label_Preprocessor(temp[j]))];
cout << "label found...\n";
}

}
if (-1 == mot.search(temp[j])) { // Check Whether literal is not in MOT

if (table.search(temp[j]) == -1) { // Check Whether literal is not already added to the Symbol Table

table.append_row(temp[j]); // So Literal is new so add it in the symbol table
cout << "Append the row to table = "<<temp[j];

}
else {
cout << "Literal present in symbol table \n";
}

}
else {
cout << "Literal present in MOT";
}
if (flag == 11) {

if (table.search(temp[j]) != -1) {

index = table.search(temp[j]);
if (table.address[index] == -999) {
cout << table.symbol[index] << " " << i * 2 << " \n";
table.address[index] = i * 2;
lc = i * 2;
}
}

}
}
if (lines[i].find("ENDP") != string::npos) {
lc = i * 2;
flag = 11;
}

}


}

void Assembler::SymbolTableAddressAllocator() {
int ad = lc + 1;
for (int i = 0; i < table.symbol.size(); i++) {
if (table.address[i] == -999) {
table.address[i] = ad + i+1;
}
}
}

Assembler::Assembler(string s)
{
string filename(s); //replace with s;
string file_contents;
file_contents = readFileIntoString(filename);
cout << file_contents << endl;
code = file_contents;
subStringGenerator();
cout << "\n";
SymbolTableGeneratorMain();
cout << "\n\n************ OUTPUT FILE **************\n" << output;
}



string Assembler::label_Preprocessor_SymbolTableGenerator_Part2_Helper(string s)
{
if (s.find(":") != string::npos) {

s = s.substr(s.find(":"),s.length()-1);
return s;
}
return s;

}
void Assembler::SymbolTableGenrationPart2()
{
int flag = 99;
int index;
for (int i = 0; i < lines.size(); i++) {
cout << lines[i]<<setw(10);
lines[i] = label_Preprocessor_SymbolTableGenerator_Part2_Helper(lines[i]);
subStringSplitter(lines[i]); // Split the statements in the literals

for (int j = 0; j < temp.size(); j++) {


if (-1 != mot.search(temp[j])) {
if (mot.opcode[mot.search(temp[j])] != -1) {
cout << mot.opcode[mot.search(temp[j])] << " ";
output.append(to_string(mot.opcode[mot.search(temp[j])]) + " ");
}
}
else if(table.search(temp[j]) != -1) {
cout << table.address[table.search(temp[j])] << " ";
output.append(to_string(table.address[table.search(temp[j])]) + " \n");
}


}
if (lines[i].find("ENDP") != string::npos) {
lc = i * 2;
flag = 11;
break;
}
cout << "\n";

}



}

void Assembler::split_literals(string str, char seperator)
{
int currIndex = 0, i = 0;
int startIndex = 0, endIndex = 0;
while (i <= len(str))
{
if (str[i] == seperator || i == len(str))
{
endIndex = i;
string subStr = "";
subStr.append(str, startIndex, endIndex - startIndex);
temp.push_back(subStr);
currIndex += 1;
startIndex = endIndex + 1;
}
i++;
}
}

DIFFERENCE BETWEEN ONE PASS AND TWO PASS ASSEMBLER

One Pass Assembler The one pass assembler passes over the file once, that is it collects all the information in one loop. It Collects labels and also resolves future references There is a major problem of future referencing Assembles assembly code in one pass It creates an intermediate file which acts as an input to two pass assembler

Advantages of Two Pass Assembler

One of the main advantages of Two-Pass Assembler is that many times the first pass of an extreme Two-pass assembler generates the output file which is then read by the second pass.

References

https://www.geeksforgeeks.org/introduction-of-assembler/

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store