rdx 모듈 개선 (1)

4.3) rdx 모듈 개선

지금 우리의 목표는 컴파일러를 만드는 것이다. 그러려면 적어도 다음의 문장은 분석해야 한다.

123 *456+var1/ var2

따라서 rdx 모듈은 최소한 이 문장을 분석할 수 있어야 한다. 이전에 작성한 rdx 모듈은 정수 피연산자만 가능했으므로, 이 부분을 StringBuffer 클래스를 이용해 개선할 것이다. 이를 위해 rdx 모듈을 생각보다 많이 리팩토링 해야 한다. 다음은 우리가 어떻게 모듈을 리팩토링 할 것인지를 보이기 위해 소스 파일의 위 부분을 가져온 것이다.

rdx.cpp

#include <iostream>

#include <sstream>

#include <vector>

#include "common.h"

#include "Stack.h"

#include "StringBuffer.h"

const int MAX_EXPR_LEN = 256;

// 식을 계산하고 값을 정수로 반환합니다.

int calculate(const char *expr);

// 연산자의 우선순위를 정수로 반환합니다.

int op_pri(const std::string &op);

// 식을 후위 표기법으로 변환합니다.

static std::vector<std::string> infix_to_postfix(const char *infix);

// 후위 표기법으로 표현된 식을 계산하고 값을 정수로 반환합니다.

int calculate_postfix(const std::vector<std::string> &postfix);

sstream, vector와 StringBuffer 클래스가 소스에 추가되었다. 또한 calculate 함수를 제외한 나머지 세 함수의 원형이 바뀌었다. 이는 StringBuffer 클래스가 토큰을 획득할 때 std::string 형식으로 획득하여 반환하기 때문에, 인자 또한 이러한 형태로 수정하는 것이 사용하기가 편하기 때문이다. 벡터 클래스가 추가됨으로써, 중위 표기식을 분석하여 토큰을 해석한 후에, 후위 표기식에서 다시 토큰을 StringBuffer 클래스를 통해 읽어야 하는 불편함을 없애고 코드를 간결하게 만들었다. 또한 static 키워드를 이용해 외부에서 infix_to_postfix 함수를 호출하지 못하게 하여 코드의 안정성을 높였다. 다음은 수정된 calculate 함수 및 op_pri 함수의 구현이다.

rdx.cpp

int calculate(const char *expr) { // calculate infix expression

// 중위 표기식을 후위 표기식으로 변환한다

std::vector<std::string> postfix = infix_to_postfix(expr);

return calculate_postfix(postfix); // 변환된 후위 표기식을 분석하고 반환한다

}

int op_pri(const std::string &op) { // get operator's priority

int priority = 0;

switch (op[0]) {

case '+': priority = 1; break; case '-': priority = 1; break;

case '*': priority = 2; break; case '/': priority = 2; break;

default: throw Exception("Invalid operator");

}

return priority;

}

이제 중위 표기법으로 표현된 식을 후위 표기법으로 변환하는 infix_to_postfix 함수를 고치자.

rdx.cpp

static std::vector<std::string> infix_to_postfix(const char *infix) {

StringBuffer bin(infix);

Stack<std::string> opStack;

std::vector<std::string> postfix; // 후위 표기식의 토큰을 저장할 벡터

while (bin.is_empty() == false) {

bin.trim();

char ch = bin.peekc();

// 정수라면 획득하고 바로 출력한다 (피연산자)

if (is_digit(ch))

postfix.push_back(bin.get_number());

// 식별자라면 획득하고 바로 출력한다 (피연산자)

else if (is_fnamch(ch))

postfix.push_back(bin.get_identifier());

// 이외의 경우 연산자로 획득한다

else {

std::string op = bin.get_operator();

if (op == "(") // 여는 괄호라면 그냥 넣는다

opStack.push(op);

else if (op == ")") { // 닫는 괄호를 발견한 경우의 처리

if (opStack.is_empty() == false) {

// get operator priority

while (opStack.is_empty() == false) {

std::string top = opStack.top();

if (top == "(") // 여는 괄호를 찾았다면 종료

break;

else

// 우선순위가 낮은 연산자를 스택에서 꺼내

// 후위 표기식에 추가

postfix.push_back(opStack.pop());

}

// 올바른 괄호 쌍이 존재하는지 확인

if (opStack.top() != "(")

throw Exception("Invalid parenthesis");

// 스택에 있는 여는 소괄호를 버린다

opStack.pop();

}

else {

if (opStack.is_empty() == false) {

// get operator priority

int new_pri = op_pri(op);

while (opStack.is_empty() == false) {

std::string top = opStack.top();

if (top == "(") // 여는 괄호를 찾았다면 종료

break;

else if (new_pri <= op_pri(top))

postfix.push_back(opStack.pop());

else

break;

}

opStack.push(op);

}

// 스택에 남은 연산자를 모두 출력한다

while (opStack.is_empty() == false) {

std::string op = opStack.pop();

if (op == "(") // 위에서 처리되지 않은 소괄호가 있다면 예외 처리

throw Exception("Invalid parenthesis");

postfix.push_back(op);

}

return postfix;

}

main에서 테스트하여 이 함수가 잘 동작함을 확인할 수 있다. 당연히 main 함수 내부에서 main_rdx 함수를 호출해야 한다.

rdx.cpp

int main_rdx(void) {

try {

char expression[MAX_EXPR_LEN] = "";

while (true) {

std::cout << "Enter expression: ";

clear_input_buffer();

std::cin.getline(expression, MAX_EXPR_LEN);

if (expression[0] == ';')

break;

std::vector<std::string> &postfix = infix_to_postfix(expression);

for (int i = 0, len = postfix.size(); i < len; ++i) {

std::cout << postfix[i] << ' ';

}

std::cout << " : " << calculate(expression) << std::endl;

}

return 0;

}

catch (Exception &ex) {

std::cerr << ex.c_str() << std::endl;

return 1;

}

이제 calculate_postfix 함수를 수정하면 끝난다. 이를 위해 먼저 다음의 함수를 소개한다.

rdx.cpp

int strtoi(const std::string &str) { // 문자열을 정수로 변환한 값을 반환합니다.

int digit, value = 0;

for (int i = 0, len = str.length(); i < len; ++i) {

digit = str[i] - '0';

value = 10 * value + digit;

}

return value;

}

std::string 문자열을 정수로 변환하는 함수다. 토큰을 모두 std::string으로 저장했으니 이러한 변환은 반드시 필요하므로 구현했다. 그러면 이제 calculate_postfix의 구현을 보이겠다. 그런데 사실 이 코드에는 아직 구현하지 않은 부분이 있다. 어떤 부분일까?

rdx.cpp

int calculate_postfix(const std::vector<std::string> &postfix) {

int value;

Stack<int> paramStack;

for (int i = 0, len = postfix.size(); i < len; ++i) {

const std::string &token = postfix[i];

if (is_digit(token[0])) { // 정수라면 변환해서 푸시

value = strtoi(token);

paramStack.push(value);

}

else if (is_fnamch(token[0])) { // 식별자라면 값을 가져와서 푸시

// value = get_identifier_value(token);

value = -1;

paramStack.push(value);

}

else { // 이외의 경우 연산자로 처리한다

const std::string &op = token;

// 스택에서 두 개의 피연산자를 꺼낸다

int right = paramStack.pop();

int left = paramStack.pop();

// 획득한 연산자로 연산한다

switch (op[0]) {

case '+': value = left + right; break;

case '-': value = left - right; break;

case '*': value = left * right; break;

case '/': value = left / right; break;

default: throw Exception("Invalid operator");

}

// 연산 결과를 다시 스택에 넣는다

paramStack.push(value);

}

if (paramStack.count() != 1) // 스택에 남은 피연산자가 1개가 아니면 예외

throw Exception("Unhandled operand found");

return paramStack.pop(); // 하나 남은 피연산자를 반환한다

}

사실 바로 보이는데, 식별자에서 값을 가져오는 부분이 구현되지 않았다. 왜 그랬을까?

식별자에서 값을 가져오려면, 먼저 식별자가 정의되어있어야 한다. 변수를 정의하지 않고 사용할 수 없는 것처럼 말이다. 그러려면 식별자를 정의하는 표를 별도로 가지고 있어야 한다. 이 문제를 설명하기 위해 아직 식별자에서 값을 가져오는 부분을 구현하지 않았다. 우리는 식별자 표를 만든 다음 위에서 주석 처리되어있는get_identifier_value 함수를 구현할 것이다.

그런데 생각해보자. 식별자 표는 어디에 정의해야 할까? rdx인가, dcl인가? rdx 모듈은 식을 계산할 때 식별자 표로부터 정보를 가져와야 한다. dcl 모듈은 선언을 분석한 후 획득한 식별자 정보를 식별자 표에 넣어야 한다. 필자의 프로그래밍 경험으로는, 두 모듈이 하나의 자료에 접근하는 경우에는 자료를 독립적으로 만들어놓고 접근자,설정자 함수를 이용해 자료를 주고받는 형태가 가장 나았다. 따라서 이 예제에서도 식별자 표 정보를 갖고 있는 새로운 모듈을 만들고, rdx와 dcl이 함수를 통해 이 모듈과 자료를 주고받도록 할 것이다. 이 새로운 모듈의 이름은 식별자 표(table)라는 뜻으로 tbl이라고 하겠다.

다음과 같이 tbl 모듈의 식별자 표 Table을 구현할 수 있다. 먼저 헤더를 보자.

Table.h

#ifndef __IDENTIFIER_TABLE_H__

#define __IDENTIFIER_TABLE_H__

#include "common.h"

#include <string>

#include <map>

class Table {

static Table *_instance; // 싱글톤 객체를 가리키는 정적 필드

std::map<std::string, std::string> _table; // 실제 식별자 표 객체

private: // tbl을 싱글톤 객체로 만들기 위해 생성자를 숨긴다

explicit Table();

~Table();

public:

// tbl 싱글톤 객체의 인스턴스를 가져옵니다.

static Table *instance();

// 식별자에 대한 접근자, 설정자 함수입니다.

std::string &get(const std::string &identifier);

void set(const std::string &identifier, const std::string &value);

};

#endif

싱글톤이라는 단어를 처음 듣는 독자가 있을 수도 있다. 싱글톤(singleton)이란 프로그램에 하나 이상 생성되지 않는 유일한 객체를 말하며, 이렇게 싱글톤을 이용하여 프로그램을 작성하는 설계 방식을 싱글톤 디자인 패턴이라고 한다. 식별자 표는 당장은 프로그램에 하나 이상 존재할 이유가 따로 없는데, 왜냐하면 표가 두 개 이상이 되면 식별자를 모든 표마다 뒤져서 찾아내야 하는 등 여러모로 번거로워지기 때문이다. 따라서 지금은 tbl 모듈은 싱글톤 객체로만 사용하겠다(당장, 지금 등 이 시점을 강조하는 이유는 이 모듈이 나중에 리팩토링 될 것이기 때문이다).

다음은 tbl 싱글톤 객체의 메서드를 구현한 것이다.

tbl.cpp

#include "Table.h"

Table *Table::_instance = nullptr;

Table::Table() {}

Table::~Table() {}

Table *Table::instance() {

return (_instance ? _instance : (_instance = new Table()));

}

std::string &Table::get(const std::string &identifier) {

try {

return _table[identifier];

}

catch (...) {

throw Exception("식별자에 대한 값을 가져올 수 없습니다.");

}

void Table::set(const std::string &identifier, const std::string &value) {

_table[identifier] = value;

}

컴파일러를 만드는 문서이니만큼 싱글톤의 구현에 관해 더 이상 설명하지 않겠다. 구현은 위와 같이 아주 간단하며, 이를 이용해 rdx 모듈을 완성할 수 있다.

rdx.cpp

...

#include "Table.h"

...

int calculate_postfix(const std::vector<std::string> &postfix) {

...

else if (is_fnamch(token[0])) { // 식별자라면 값을 가져와서 푸시

std::string ival = Table::instance()->get(token); // 키에 대한 값 획득

value = strtoi(ival); // 획득한 값을 정수로 변환

paramStack.push(value); // 피연산자 스택에 푸시

}

...

}

다음과 같이 테스트가 정상적으로 동작함을 확인할 수 있다.

현재 버전	2015-06-07 12:30:30
선택 버전

rdx 모듈 개선 (1)

댓글