;; -*- mode: scheme; coding: utf-8 -*-
;; SPDX-License-Identifier: AGPL-3.0-or-later
;; Loko Scheme - an R6RS Scheme compiler
;; Copyright © 2019, 2020 Göran Weinholt

;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU Affero General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU Affero General Public License for more details.

;; You should have received a copy of the GNU Affero General Public License
;; along with this program.  If not, see <https://www.gnu.org/licenses/>.
#!r6rs

;;; Standard library for symbols

(library (loko runtime symbols)
  (export
    symbol? symbol->string symbol=? string->symbol

    symbol-hash $symbol-hash

    gensym gensym? gensym->unique-string gensym-prefix
    *unbound-hack*
    symbol-value set-symbol-value!
    $gensym-generate-names!)
  (import
    (except (rnrs) symbol? symbol->string symbol=? string->symbol symbol-hash)
    (prefix (rnrs) sys:)
    (rnrs mutable-strings)
    (only (loko runtime utils) bytevector-hash string-hash*)
    (loko runtime context)
    (loko system $primitives))

(define *unbound-hack* (vector 'unbound))

;; Slots in the symbol object. The box header value lowest bit is 1
;; for gensyms from the bootstrap. The second lowest bit is 1 for
;; gensyms. The hash is the string-hash* of the name for regular
;; symbols and the string-hash* of the unique string for gensyms.
(define SYMBOL-NAME      0)
(define SYMBOL-HASH      1)
(define SYMBOL-UNIQUE    2)             ;gensyms only
(define SYMBOL-VALUE/IDX 3)             ;gemsyms only

(define LENGTH-SYMBOL 2)
(define LENGTH-GENSYM 4)

(define (symbol? x)
  (or ($immsym? x)
      (and ($box? x)
           ($box-header-type-eq? ($box-type x) 'symbol))))

(define (symbol->string v)
  (define alphabet     "abcdefghijklmnopqrstuvwxyz-/<=>")
  (define end-alphabet "acdefghklmnopqrstvxy!*+-/08<=>?")
  (cond
    (($immsym? v)
     (let* ((s ($immsym->fixnum v))
            (len (fxdiv (fx+ (fxlength s) 4) 5))
            (str (make-string len)))
       (if (eqv? len 0)
           str
           (let lp ((s s) (i 0))
             (let ((s (fxarithmetic-shift-right s 5))
                   (c (fx- (fxand s #b11111) 1)))
               (cond ((eqv? s 0)
                      (string-set! str i (string-ref end-alphabet c))
                      str)
                     (else
                      (string-set! str i (string-ref alphabet c))
                      (lp s (fx+ i 1)))))))))
    ((and ($box? v)
          ($box-header-type-eq? ($box-type v) 'symbol))
     (when (and (gensym? v) (not ($box-ref v SYMBOL-NAME)))
       ($gensym-generate-names! v))
     (utf8->string ($box-ref v SYMBOL-NAME)))
    (else
     (assertion-violation 'symbol->string "Expected a symbol" v))))

(define (symbol=? x y . x*)
  (unless (and (symbol? x) (symbol? y))
    (apply assertion-violation 'symbol=? "Expected symbols" x y x*))
  (let lp ((x* x*) (ret (eq? x y)))
    (if (null? x*)
        ret
        (if (symbol? (car x*))
            (lp (cdr x*) (and ret (eq? x (car x*))))
            (apply assertion-violation 'symbol=? "Expected symbols" x y x*)))))

(define *interned* (make-hashtable bytevector-hash bytevector=?))

(define (string->symbol s)
  (define alphabet     "abcdefghijklmnopqrstuvwxyz-/<=>")
  (define end-alphabet "acdefghklmnopqrstvxy!*+-/08<=>?")
  (define (string-index s c)
    (let lp ((i 0))
      (and (not (fx=? i (string-length s)))
           (let ((c* (string-ref s i)))
             (if (eq? c c*)
                 i
                 (lp (fx+ i 1)))))))
  (define (immsym-encode s)
    (let ((len (string-length s)))
      (and (fx<=? 1 len 12)
           (let ((ret (string-index end-alphabet (string-ref s (fx- len 1)))))
             (and ret
                  (let lp ((ret (fx+ ret 1)) (i (fx- len 2)))
                    (if (eqv? i -1)
                        ($fixnum->immsym ret)
                        (let ((j (string-index alphabet (string-ref s i))))
                          (and j
                               (lp (fxior (fxarithmetic-shift-left ret 5)
                                          (fx+ j 1))
                                   (fx- i 1)))))))))))
  (define (find-bootstrap-symbol bv)
    (let ((G/V ($bootstrap-symbols)))
      (let ((G (car G/V)) (V (cdr G/V)))
        (let* ((d (vector-ref G (fxmod (bytevector-hash bv) (vector-length G))))
               (idx (if (negative? d)
                        (fx- -1 d)
                        (fxmod (bytevector-hash bv d)
                               (vector-length V))))
               (sym (vector-ref V idx)))
          (and (bytevector=? ($box-ref sym SYMBOL-NAME) bv)
               sym)))))
  (unless (string? s)
    (assertion-violation 'string->symbol "Expected a string" s))
  (or (immsym-encode s)
      (let ((bv (string->utf8 s)))
        (or
          (find-bootstrap-symbol bv)
          (hashtable-ref *interned* bv #f)
          (let* ((hash (string-hash* s))
                 (sym ($make-box ($make-box-header 'symbol #t #b00 LENGTH-SYMBOL) LENGTH-SYMBOL)))
            ($box-set! sym SYMBOL-NAME bv)
            ($box-set! sym SYMBOL-HASH hash)
            (hashtable-set! *interned* bv sym)
            sym)))))

(define ($symbol-hash s)
  (cond (($immsym? s)
         ($immsym->fixnum s))
        (else
         (when (not ($box-ref s SYMBOL-HASH))
           ($gensym-generate-names! s))
         ($box-ref s SYMBOL-HASH))))

(define (symbol-hash s)
  (unless (symbol? s)
    (assertion-violation 'symbol-hash "Expected a symbol" s))
  ($symbol-hash s))

;;; gensyms
;; TODO: collect all the gensym related stuff here

(define (gensym? x)
  ;; If the size field of the box is not two, then there's a
  ;; unique-string there. And maybe a value? And maybe an index into
  ;; the top level environment?
  (and ($box? x)
       (let ((t ($box-type x)))
         ($box-header-type-eq? t 'symbol #b10 #b10))))

(define (gensym->unique-string v)
  (assert (gensym? v))
  (when (not ($box-ref v SYMBOL-UNIQUE))
    ($gensym-generate-names! v))
  (utf8->string ($box-ref v SYMBOL-UNIQUE)))

(define (gensym-prefix v)
  (assert (gensym? v))
  (when (not ($box-ref v SYMBOL-NAME))
    ($gensym-generate-names! v))
  (utf8->string ($box-ref v SYMBOL-NAME)))

(define gensym
  (case-lambda
    (()
     (gensym *unbound-hack*))
    ((prefix)
     (let ((p (cond ((eq? prefix *unbound-hack*) #f)
                    ((symbol? prefix) (string->utf8 (symbol->string prefix)))
                    ((string? prefix) (string->utf8 prefix))
                    (else
                     (error 'gensym "This procedure needs a string or a symbol"
                            prefix)))))
       (let ((ret ($make-box ($make-box-header 'symbol #t #b10 LENGTH-GENSYM) LENGTH-GENSYM)))
         ($box-set! ret SYMBOL-NAME p)
         ($box-set! ret SYMBOL-HASH #f)   ;generated later
         ($box-set! ret SYMBOL-UNIQUE #f) ;generated later
         ($box-set! ret SYMBOL-VALUE/IDX *unbound-hack*)
         ret)))))

(define (gensym-from-bootstrap? symbol)
  (eqv? 1 (fxand 1 ($box-header-value ($box-type symbol)))))

;; Unique strings for gensyms generated lazily.
(define $gensym-generate-names!
  (let ((g-count -1)
        (id-count -1))            ;TODO: should be some kind of UUID
    (lambda (g)
      (assert (gensym? g))
      (unless ($box-ref g SYMBOL-NAME)
        (set! g-count (+ g-count 1))
        ($box-set! g SYMBOL-NAME (string->utf8 (string-append "g" (number->string g-count)))))
      (unless ($box-ref g SYMBOL-UNIQUE)
        (set! id-count (+ id-count 1))
        (let* ((str (string-append "u" (number->string id-count)))
               (hash* (string-hash* str)))
          ($box-set! g SYMBOL-UNIQUE (string->utf8 str))
          ($box-set! g SYMBOL-HASH hash*)))
      (unless ($box-ref g SYMBOL-HASH)
        (let* ((str (utf8->string ($box-ref g SYMBOL-UNIQUE)))
               (hash* (string-hash* str)))
          ($box-set! g SYMBOL-HASH hash*))))))

(define (set-symbol-value! symbol value)
  (define (gensym-not-from-bootstrap? symbol)
    (and ($box? symbol)
         (let ((t ($box-type symbol)))
           (and (eqv? ($box-header-length t) LENGTH-GENSYM)
                ($box-header-type-eq? t 'symbol #b1 #b0)))))
  (cond ((gensym-not-from-bootstrap? symbol)
         ($box-set! symbol SYMBOL-VALUE/IDX value))
        (else
         (let ((idx ($box-ref symbol SYMBOL-VALUE/IDX)))
           (error 'set-symbol-value! "TODO: Set a bootstrap symbol" symbol value idx)))))

(define (symbol-value symbol)
  (cond ((gensym? symbol)
         (cond ((gensym-from-bootstrap? symbol)
                ;; A bootstrap gensym contains an index into the
                ;; process vector where the symbol's value is
                ;; stored. This is because they are shared between
                ;; all processes.
                (let ((idx ($box-ref symbol SYMBOL-VALUE/IDX)))
                  (if (fixnum? idx)
                      (vector-ref ($processor-data-ref CPU-VECTOR:PROCESS-VECTOR) idx)
                      (raise (condition (make-undefined-violation)
                                        (make-who-condition 'symbol-value)
                                        (make-message-condition "Unbound bootstrap variable")
                                        (make-irritants-condition (list symbol)))))))
               (else
                ;; A non-bootstrap gensym just contains the value.
                (let ((x ($box-ref symbol SYMBOL-VALUE/IDX)))
                  (if (eq? x *unbound-hack*)
                      (raise (condition (make-undefined-violation)
                                        (make-who-condition 'symbol-value)
                                        (make-message-condition "Unbound variable")
                                        (make-irritants-condition (list symbol))))
                      x)))))
        (else
         (error 'symbol-value "Expected a gensym" symbol)))))
