字句解析#2

字句解析を続ける。

ファイルから読み出した文字列に順番を加えてトークンとする。

(defvar MGH_vlt "^0C.\\\(............\\\)\\\(............\\\)\\\(............\\\)\\\(............\\\)\\\(............\\\)\\\(............\\\)\\\(........\\\)............\\\(....\\\)\\\(...\\\)\\\(...\\\)10.*")
(defvar MGH_flt "^0C.\\\(............\\\)\\\(............\\\)\\\(............\\\)\\\(............\\\)\\\(............\\\)\\\(............\\\)\\\(........\\\)............\\\(....\\\)\\\(...\\\)\\\(...\\\)11.*")
(defvar MGH_ff  "^0C.\\\(............\\\)\\\(............\\\)\\\(............\\\)\\\(............\\\)\\\(............\\\)\\\(............\\\)\\\(........\\\)............\\\(....\\\)\\\(...\\\)\\\(...\\\)20.*")
(defvar MGD_monoTR "^9D\\\([0-9][0-9][0-9][0-9][0-9]\\\).*\xfe.*")
(defvar MGD_initTR "^1D\\\([0-9][0-9][0-9][0-9][0-9]\\\).*")
(defvar MGD_extrTR "^[1-8].*")
(defvar MGD_termTR "^9.*")
(defvar MGD_ffmtTR "^9D\\\([0-9][0-9][0-9][0-9][0-9]\\\).*")
(defvar MGT        "^0E\\\([0-9][0-9][0-9][0-9][0-9]\\\).*")
(defvar EOF        nil)

(defun new-lexer (filename)
  (cons (find-file-noselect filename) 0))

(defun rewind-lexer (l)
  (progn
    (seq-rewind (car l))
    (setcdr l 0)
    )
  )

(defun getToken-lexer (l)
  (let ((lineno (cdr l)) (bin (seq-read (car l))))
    (progn
      (setq tokentype (type-scan bin))
      (setq lineno (1+ lineno))
      (setcdr l lineno)
      (list tokentype bin lineno)
      )
    ))

(defun type-scan (s)
  (cond ((eq s nil)
	 'EOF
	 )
	((string-match MGH_vlt s)
	 'MGH_vlt
	 )
	((string-match MGH_flt s)
	 'MGH_flt 
	 )
	((string-match MGH_ff s)
	 'MGH_ff
	 )
	((string-match MGD_monoTR s)
	 'MGD_monoTR
	 )
	((string-match MGD_initTR s)
	 'MGD_initTR
	 )
	((string-match MGD_extrTR s)
	 'MGD_extrTR
	 )
	((string-match MGD_ffmtTR s)
	 'MGD_ffmtTR
	 )
	((string-match MGT s)
	 'MGT
	 )
	((string-match MGD_termTR s)
	 'MGD_termTR
	 )
	(t
	 'Invalid
	 )
	))

(setq l (new-lexer "~/test.cii"))
0

(getToken-lexer l)
(MGH_flt "0C0111111111111222222222222333333333333444444444444555555555555666666666666EIAJ0502777777777777888899900011------------------------------------------------------------------------------------------------------------------------------------------------" 1)

(getToken-lexer l)
(MGD_monoTR "9D00001^@\371-----22222233444444444444555555555555666666666666777777777777888899900011-----------------------------------------------------------------------------------------------------------------------------------------------------------------------\376 " 2)

ここで、トークンの種類を判定しているtype-scanに注目しよう。こういう同一構造のcondの場合は、catch, throwを使うと簡潔になる。

(setq token_list '(MGH_vlt MGH_flt MGH_ff 
			     MGD_monoTR MGD_initTR MGD_extrTR 
			     MGD_ffmtTR MGD_termTR MGT))
(defun type-scan (s)
  (cond ((eq s nil)
	 'EOF)
	((catch 'found
	   (dolist (type token_list ret)
	     (when (string-match (symbol-value type) s)
	       (setq ret type) (throw 'found t))
	     ))
	 ret)
	(t
	 'Invalid)))

Common Lisp だとより簡潔になる。最後のInvalidは、(defvar Invalid "^.*")とtoken_listに加えて番兵としてもよいが、通常は例外を出すところである。

(defun type-scan (s)
  (if (eq s nil)
      'EOF
    (loop for type in token_list 
	  thereis
	  (when (string-match (symbol-value type) s) type)
          finally 'Invalid)))