;;; -*- lexical-binding: t -*-
;;
;; this file:
;;   https://dataswamp.org/~incal/emacs-init/b-a-original.el
;;
;; Usage:
;;
;;   Set the region then M-x rank-buffer RET
;;
;; Example output for the `require' lines below:
;;
;;   1. (require 'pcase)      81% 407
;;   2. (require 'cl-lib)     80% 402
;;   3. (require 'psea)       80% 402
;;   4. (require 'subr-x)     80% 402
;;   5. (require 'thingatpt)  77% 387
;;
;;   This means "(require 'pcase)" is the most original
;;   line of those five, it is 81% original at a sum of
;;   407 originality points.
;;
;; Discontinued:
;;
;;   Some of it was brought over to this project,
;;   https://dataswamp.org/~incal/elpa/fun-names.el

(require 'cl-lib)
(require 'pcase)
(require 'psea)
(require 'subr-x)
(require 'thingatpt)

;; all-Emacs stuff begins here

(defun new-rating-system (r rw rb w b &optional float)
  "R is the rating.
\nRW is the rating system worst possible rating, RB is the best.
\nRW and RB are the worst and best ratings possible for the source system.
\nW and B: same as above but for the destination system.
\nSet FLOAT to get a float instead of the default integer.
\nTo go from a say 3.5 rating in the 1-5 system to a 1-30 system, do
\n  (new-rating-system 3.5 1 5 1 30)
\nor
\n  (new-rating-system 3.5 1 5 1 30 t)
\nThe results are: 19 and 19.1"
  (let ((rating (+ w (* (- r rw) (/ (- b w) (- rb rw) 1.0)))))
    (if float
        (format "%.1f" rating)
      (floor rating) )))

;; string to set

(defun string-words (str &optional no-sort keep-case)
  (or keep-case (setq str (downcase str)))
  (let ((words (split-string str "[-[:space:]()]+" t "[[:punct:]]+")))
    (if no-sort
        words
      (sort words) )))

;; buffer to set

(defun strings-tidy (strs)
  (remove "" (cl-remove-duplicates (sort strs) :test #'string=)) )

(defun buffer-lines (&optional beg end)
  (or beg (setq beg (point-min)))
  (or end (setq end (point-max)))
  (let ((strs (split-string (buffer-substring-no-properties beg end) "\n")))
    (strings-tidy strs) ))

(defun buffer-sentences (&optional beg end)
  (or beg (setq beg (point-min)))
  (or end (setq end (point-max)))
  (save-mark-and-excursion
    (goto-char beg)
    (cl-loop
      with buf-strs
      with next-str
      while (< (point) end) do
        (setq next-str (sentence-at-point t))
        (when next-str
          (push next-str buf-strs) )
        (forward-sentence)
      finally return (strings-tidy buf-strs) )))

;; all-Emacs stuff ends here

;; interface

(defun prepare-string (str &optional col)
  (or col (setq col fill-column))
  (truncate-string-to-width
    (string-trim (replace-regexp-in-string "[\n\t[:space:]]+" " " str))
    col nil nil "...") )

(defun original-buffer ()
  (get-buffer-create "*original*") )

(defun print-results (res)
  (let ((buf-dst (original-buffer)))
    (with-current-buffer buf-dst
      (let ((str     (nth 0 res))
            (min     (nth 1 res))
            (min-str (nth 2 res))
            (max     (nth 3 res))
            (max-str (nth 4 res))
            (avg     (nth 5 res))
            (sum     (nth 6 res)) )
        (erase-buffer)
        (insert "TOFT - originality finder tool\n\n")
        (insert (prepare-string (format "string: \"%s\"" str)) "\n\n")
        (insert (prepare-string (format "min: %d%% - %s" min min-str)) "\n")
        (insert (prepare-string (format "max: %d%% - %s" max max-str)) "\n")
        (insert (format "avg: %d%%\n" avg))
        (insert (format "sum: %d\n"   sum))
        (goto-char (point-min)) )
    (pop-to-buffer buf-dst) )))

(defun string-compare-buffer (str &optional beg end)
  (interactive
    (list
      (read-string "string: ")
      (when (use-region-p)
        (list (region-beginning) (region-end)) )))
  (or beg (setq beg (point-min)))
  (or end (setq end (point-max)))
  (print-results
    (string-compare-many str (buffer-sentences beg end)) ))

;; main test

(defun string-compare-many (str str-all)
   (cl-loop
     with min
     with max
     with min-str
     with max-str
     with sum = 0
     with mean
     with s-score
     for s in str-all do
       (setq s-score (string-compare str s))
       (cl-incf sum s-score)
       (when (or (not min) (< s-score min))
         (setq min s-score)
         (setq min-str s) )
       (when (or (not max) (< max s-score))
         (setq max s-score)
         (setq max-str s) )
     finally (setq mean (floor (/ sum (length str-all) 1.0)))
     finally return (list str min min-str max max-str mean sum) ))

(defun do-rank (strs &optional comps)
  (or comps (setq comps strs))
  (cl-loop
    for s in strs
    collect (string-compare-many s comps) into rank
    finally return (cl-sort rank #'> :key (lambda (l) (car (last l)))) ))

(defun do-rank-print (rank)
  (let ((buf-dst (original-buffer)))
    (with-current-buffer buf-dst
      (erase-buffer)
      (cl-loop
        with str
        with mean
        with score
        for r in rank
        for pos from 1 do
        (setq str   (string-pad (prepare-string (car r)) fill-column))
        (setq mean  (car (last (butlast r))))
        (setq score (car (last r)))
        (insert (format "%2d. %s %d%% %d\n" pos str mean score)) ))
    (pop-to-buffer buf-dst) ))

(defun rank-buffer (&optional beg end)
  (interactive "r")
  (or beg (setq beg (point-min)))
  (or end (setq end (point-max)))
  (do-rank-print (do-rank (buffer-lines beg end))) )

;; apply all tests to two strings

(defun string-compare (s1 s2 &optional full)
  (let ((tests (list
                 #'length-test
                 #'number-of-words-test
                 #'same-words-test
                 #'string-distance-test
                 )))
    (cl-loop
      with res
      with score = 0
      with f-score
      for f in tests do
        (setq f-score (apply f (list s1 s2)))
        (push f-score res)
        (cl-incf score f-score)
      finally (setq score (floor (/ score (length tests) 1.0)))
      finally return (if full res score) )))

;; individual tests, normalize score

(defun originality-score (a b)
  (pcase-let*((`(,n ,d) (if (< a b) `(,a ,b) `(,b ,a))))
    (new-rating-system (/ n d 1.0) 0 1 1 100) ))

;; (originality-score 0 100) ; 100
;; (originality-score 1   1) ;   1

;; individual tests

(defun length-test (s1 s2)
  (let ((l1 (length s1))
        (l2 (length s2)) )
    (originality-score l1 l2) ))

;; (length-test "abc" "abcdef") ; 50
;; (length-test "abc" "bcd")    ;  1

(defun number-of-words-test (s1 s2)
  (let ((nw1 (length (string-split s1)))
        (nw2 (length (string-split s2))) )
    (originality-score nw1 nw2) ))

;; (number-of-words-test "my oh my" "George Walker Bush") ; 100
;; (number-of-words-test "none"     "a b c d e f g h i")  ;  12

(defun same-words-test (s1 s2)
  (let*((w1 (string-words s1))
        (w2 (string-words s2))
        (num    (+ (length w1) (length w2)))
        (common (- num (length (cl-set-exclusive-or w1 w2 :test #'string=)))) )
    (originality-score common num) ))

;; (same-words-test "d"     "c")   ; 100
;; (same-words-test "a b c" "a d") ;  60
;; (same-words-test "a b"   "a d") ;  50
;; (same-words-test "a"     "a")   ;   1

(defun string-distance-test (s1 s2)
  (originality-score (string-distance-percentage s1 s2) 100) )

;; (string-distance-test "ooo" "aaa") ; 100
;; (string-distance-test "iii" "iii") ;   1

(provide 'b-a-original)