;;; -*- lexical-binding: t -*- ;; ;; this file: ;; https://dataswamp.org/~incal/emacs-init/b-a-original.el ;; ;; Usage: ;; ;; Set the region then M-x rank-buffer RET ;; ;; Example output for the `require' lines below: ;; ;; 1. (require 'pcase) 81% 407 ;; 2. (require 'cl-lib) 80% 402 ;; 3. (require 'psea) 80% 402 ;; 4. (require 'subr-x) 80% 402 ;; 5. (require 'thingatpt) 77% 387 ;; ;; This means "(require 'pcase)" is the most original ;; line of those five, it is 81% original at a sum of ;; 407 originality points. ;; ;; Discontinued: ;; ;; Some of it was brought over to this project, ;; https://dataswamp.org/~incal/elpa/fun-names.el (require 'cl-lib) (require 'pcase) (require 'psea) (require 'subr-x) (require 'thingatpt) ;; all-Emacs stuff begins here (defun new-rating-system (r rw rb w b &optional float) "R is the rating. \nRW is the rating system worst possible rating, RB is the best. \nRW and RB are the worst and best ratings possible for the source system. \nW and B: same as above but for the destination system. \nSet FLOAT to get a float instead of the default integer. \nTo go from a say 3.5 rating in the 1-5 system to a 1-30 system, do \n (new-rating-system 3.5 1 5 1 30) \nor \n (new-rating-system 3.5 1 5 1 30 t) \nThe results are: 19 and 19.1" (let ((rating (+ w (* (- r rw) (/ (- b w) (- rb rw) 1.0))))) (if float (format "%.1f" rating) (floor rating) ))) ;; string to set (defun string-words (str &optional no-sort keep-case) (or keep-case (setq str (downcase str))) (let ((words (split-string str "[-[:space:]()]+" t "[[:punct:]]+"))) (if no-sort words (sort words) ))) ;; buffer to set (defun strings-tidy (strs) (remove "" (cl-remove-duplicates (sort strs) :test #'string=)) ) (defun buffer-lines (&optional beg end) (or beg (setq beg (point-min))) (or end (setq end (point-max))) (let ((strs (split-string (buffer-substring-no-properties beg end) "\n"))) (strings-tidy strs) )) (defun buffer-sentences (&optional beg end) (or beg (setq beg (point-min))) (or end (setq end (point-max))) (save-mark-and-excursion (goto-char beg) (cl-loop with buf-strs with next-str while (< (point) end) do (setq next-str (sentence-at-point t)) (when next-str (push next-str buf-strs) ) (forward-sentence) finally return (strings-tidy buf-strs) ))) ;; all-Emacs stuff ends here ;; interface (defun prepare-string (str &optional col) (or col (setq col fill-column)) (truncate-string-to-width (string-trim (replace-regexp-in-string "[\n\t[:space:]]+" " " str)) col nil nil "...") ) (defun original-buffer () (get-buffer-create "*original*") ) (defun print-results (res) (let ((buf-dst (original-buffer))) (with-current-buffer buf-dst (let ((str (nth 0 res)) (min (nth 1 res)) (min-str (nth 2 res)) (max (nth 3 res)) (max-str (nth 4 res)) (avg (nth 5 res)) (sum (nth 6 res)) ) (erase-buffer) (insert "TOFT - originality finder tool\n\n") (insert (prepare-string (format "string: \"%s\"" str)) "\n\n") (insert (prepare-string (format "min: %d%% - %s" min min-str)) "\n") (insert (prepare-string (format "max: %d%% - %s" max max-str)) "\n") (insert (format "avg: %d%%\n" avg)) (insert (format "sum: %d\n" sum)) (goto-char (point-min)) ) (pop-to-buffer buf-dst) ))) (defun string-compare-buffer (str &optional beg end) (interactive (list (read-string "string: ") (when (use-region-p) (list (region-beginning) (region-end)) ))) (or beg (setq beg (point-min))) (or end (setq end (point-max))) (print-results (string-compare-many str (buffer-sentences beg end)) )) ;; main test (defun string-compare-many (str str-all) (cl-loop with min with max with min-str with max-str with sum = 0 with mean with s-score for s in str-all do (setq s-score (string-compare str s)) (cl-incf sum s-score) (when (or (not min) (< s-score min)) (setq min s-score) (setq min-str s) ) (when (or (not max) (< max s-score)) (setq max s-score) (setq max-str s) ) finally (setq mean (floor (/ sum (length str-all) 1.0))) finally return (list str min min-str max max-str mean sum) )) (defun do-rank (strs &optional comps) (or comps (setq comps strs)) (cl-loop for s in strs collect (string-compare-many s comps) into rank finally return (cl-sort rank #'> :key (lambda (l) (car (last l)))) )) (defun do-rank-print (rank) (let ((buf-dst (original-buffer))) (with-current-buffer buf-dst (erase-buffer) (cl-loop with str with mean with score for r in rank for pos from 1 do (setq str (string-pad (prepare-string (car r)) fill-column)) (setq mean (car (last (butlast r)))) (setq score (car (last r))) (insert (format "%2d. %s %d%% %d\n" pos str mean score)) )) (pop-to-buffer buf-dst) )) (defun rank-buffer (&optional beg end) (interactive "r") (or beg (setq beg (point-min))) (or end (setq end (point-max))) (do-rank-print (do-rank (buffer-lines beg end))) ) ;; apply all tests to two strings (defun string-compare (s1 s2 &optional full) (let ((tests (list #'length-test #'number-of-words-test #'same-words-test #'string-distance-test ))) (cl-loop with res with score = 0 with f-score for f in tests do (setq f-score (apply f (list s1 s2))) (push f-score res) (cl-incf score f-score) finally (setq score (floor (/ score (length tests) 1.0))) finally return (if full res score) ))) ;; individual tests, normalize score (defun originality-score (a b) (pcase-let*((`(,n ,d) (if (< a b) `(,a ,b) `(,b ,a)))) (new-rating-system (/ n d 1.0) 0 1 1 100) )) ;; (originality-score 0 100) ; 100 ;; (originality-score 1 1) ; 1 ;; individual tests (defun length-test (s1 s2) (let ((l1 (length s1)) (l2 (length s2)) ) (originality-score l1 l2) )) ;; (length-test "abc" "abcdef") ; 50 ;; (length-test "abc" "bcd") ; 1 (defun number-of-words-test (s1 s2) (let ((nw1 (length (string-split s1))) (nw2 (length (string-split s2))) ) (originality-score nw1 nw2) )) ;; (number-of-words-test "my oh my" "George Walker Bush") ; 100 ;; (number-of-words-test "none" "a b c d e f g h i") ; 12 (defun same-words-test (s1 s2) (let*((w1 (string-words s1)) (w2 (string-words s2)) (num (+ (length w1) (length w2))) (common (- num (length (cl-set-exclusive-or w1 w2 :test #'string=)))) ) (originality-score common num) )) ;; (same-words-test "d" "c") ; 100 ;; (same-words-test "a b c" "a d") ; 60 ;; (same-words-test "a b" "a d") ; 50 ;; (same-words-test "a" "a") ; 1 (defun string-distance-test (s1 s2) (originality-score (string-distance-percentage s1 s2) 100) ) ;; (string-distance-test "ooo" "aaa") ; 100 ;; (string-distance-test "iii" "iii") ; 1 (provide 'b-a-original)