Phones-to-Words Challenge I: Emacs Lisp as an alternative to Java

When I stumbled upon this old Lisp as an alternative to Java study, I felt I wanted to do it in Emacs Lisp. And so I did.

I also wanted to do it in Bash and compare these two fairly different animals. Since Emacs Lisp seemed, in principle, like a clearly superior choice for this, both in speed and in libraries to deal with the problem, I started with it.

Excluding commented and empty lines, the size of Lispers' versions ranged from 50 to 181 lines of code, with median 134. My Emacs Lisp version, after excluding these plus the docstrings, ended up with 72 LoC.

Here it goes:

;;; phones2words.el --- Convert phone numbers to words   -*- lexical-binding: t -*-
;;
;; SPDX-FileCopyrightText:  © flandrew
;; SPDX-License-Identifier: GPL-3.0-or-later
;;
;;; Commentary:
;;
;; Emacs Lisp solution to: "Convert phone numbers to words",
;;                         a programming challenge by Ron Garret.
;;
;;   Original instructions: <https://flownet.com/ron/papers/lisp-java/>
;;
;;   To run it, put the input files in a subfolder ./in and eval this file:
;;                       (emacs-lisp-byte-compile-and-load)
;;
;;   Then:           (phones2words-convert "in/input.txt" "in/dictionary.txt")
;;     or simply:    (phones2words-convert)          ; defaults to the above
;;     for umlauts:  (phones2words-convert nil nil 'umlautify)
;;     to pass other paths/files and keep umlauts, e.g.:
;;        (phones2words-convert "in/sample-input" "in/sample-dict" 'umlautify)


;;; Code:
;;;; Libraries

(mapc #'require '(cl-extra dash s f ht)) ; cl-extra is used for a ‘cl-mapcan’.


;;;; Functions

(defun alpha-to-digit (str)
  "Convert letters to digits in string STR according to the given rules."
  (let ((result (downcase str))
        (rules  '(("[e]"    . "0")
                  ("[jnq]"  . "1") ("[rwx]"  . "2") ("[dsy]" . "3")
                  ("[ft]"   . "4") ("[aäm]"  . "5") ("[civ]" . "6")
                  ("[bkuü]" . "7") ("[loöp]" . "8") ("[ghz]" . "9"))))
    (pcase-dolist (`(,from . ,to) rules)
      (setq result (replace-regexp-in-string from to result)))
    result))

(defun umlautify (str &optional reverse)
  "Replace [AOUaou]\" with properly-umlauted characters in string STR.
When REVERSE is non-nil, do the opposite."
  (let* ((uq->ü  '(("A\"" . "Ä") ("O\"" . "Ö") ("U\"" . "Ü")
                   ("a\"" . "ä") ("o\"" . "ö") ("u\"" . "ü")))
         (ü->uq  (--map (cons (cdr it) (car it)) uq->ü)))
    (s-replace-all (if reverse ü->uq uq->ü) str)))

(defun wordfile-to-ht (wordfile)
  "From WORDFILE create hashtable: key=digits-as-string, value=list-of-words."
  (let* ((wordstrg (->> wordfile     f-read     umlautify))
         (wordlist (->> wordstrg                  s-lines))
         (keyslist (->> wordstrg  alpha-to-digit  s-lines))
         (hashtble (ht-create)))
    ;; Since different words translate to the same "digits-as-string" key, the
    ;; values will be lists of keys.
    (--zip-with (ht-set! hashtble it `(,other ,@(ht-get hashtble it)))
                keyslist wordlist)
    hashtble))

(defun phone-to-list (hs phone-str &optional wasdig)
  "Given a hashtable HS and a PHONE-STR, return nested list of results.
If WASDIG is non-nil, previous number was a digit."
  (let* ((phone    (replace-regexp-in-string "[^0-9]" "" phone-str))
         (len      (length phone))
         (sizes    (number-sequence len 1 -1))
         (lsubstrs (--map (substring phone 0 it) sizes))
         (lmatches (-flatten (--map (ht-get hs it) lsubstrs))))
    ;; Use the digit if no "left-side subnumber" worked.
    (if (and (not wasdig) (null lmatches))
        (setq lmatches `(,(substring phone 0 1))
              wasdig   t)
      (setq wasdig nil))
    ;; Call function recursively over the "right-side" of every match.
    (-non-nil
     (-map (lambda (lmatch)
             (let* ((sz    (length lmatch))
                    (whole (= sz len))
                    (rest  (unless whole
                             (phone-to-list hs (substring phone sz) wasdig))))
               (cond (whole                  `(,lmatch))
                     ((null (-flatten rest)) `())
                     (t    (cons lmatch (apply #'append
                                               (--map (pcase (length it)
                                                        (0 `())
                                                        (1 `(,@it))
                                                        (_ `(,it)))
                                                      rest)))))))
           lmatches))))

(defun all-paths (tree)
  "Enumerate all root-to-leaves paths of a TREE."
  (if (atom tree) `((,tree))
    (cl-mapcan (lambda (branch)
                 (--map (cons (car tree) it) (all-paths branch)))
               (cdr tree))))

(defun phone-to-results (hs phone-str)
  "Given a hashtable HS and a PHONE-STR, return results as string."
  (let ((res (--map (if (cdr it) it (car it))
                    (phone-to-list hs phone-str))))
    (unless (null (-flatten res))
      (->> (cons (concat phone-str ":") res)   all-paths
           (mapcar (lambda (it) (s-join " " it)))   -flatten   (s-join "\n")))))

(defun all-phones-to-results (&optional phones dict umlautify)
  "Given a PHONES file and a DICT file, return the results as string.
If UMLAUTIFY is non-nil, render the results with proper umlauts."
  (let* ((pwd         (f-dirname (f-this-file)))
         (phones-file (concat pwd "/" (or phones "in/input.txt")))
         (dict-file   (concat pwd "/" (or dict   "in/dictionary.txt")))
         (hash-tbl    (wordfile-to-ht dict-file))
         (phones-list (->> phones-file  f-read s-lines (remove "")))
         (results     (->> phones-list  (--map (phone-to-results hash-tbl it))
                           -non-nil     (s-join "\n"))))
    (unless umlautify (setq results (umlautify results :reverse)))
    (let ((buf (get-buffer-create "*Phones to Words*")))
      (switch-to-buffer buf)   (erase-buffer)   (insert results))))


;;;; Aliases

;;;###autoload
(defalias 'phones2words-convert #'all-phones-to-results)

;;(phones2words-convert)


;;;; Wrapping up

(provide 'phones2words)

;; Local Variables:
;; coding:                     utf-8
;; indent-tabs-mode:           nil
;; sentence-end-double-space:  nil
;; outline-regexp:             ";;;;* "
;; End:

;;; phones2words ends here

(If you have Emacs Lisp fluency and spot any potential improvements while reading the above, please tell us. Anything from better algorithm choices, alternative approaches, and libraries or functions that could have been used for speed, robustness or conciseness — all are welcome.

I'd be particularly interested in what higher-level functions and approaches could have simplified my solution, if any. What patterns did I miss that made me write in five lines where one would have sufficed?)

Being done here, I tackled it in Bash. Oh my.