Ключевые слова:rus, xemacs, encoding, example, (найти похожие документы)
_ RU.UNIX (2:5077/15.22) ____________________________________________ RU.UNIX _
From : Boris Tobotras 2:5020/510 19 Dec 99 13:24:08
Subj : xemacs и КОИ-8 буквы не входящие в 8859-1
_______________________________________________________________________________
>>>>> "Serge" == Serge Matveev writes:
Serge> Да, если кто не понял, речь идет о русской "в". Она же, похоже,
Serge> влияет и на удаление предложений (M-k) - не всегда то, что я
Serge> хочу. Очень обломно, однако :-((
Да понял кто, понял. Русская "в" относится к числу немногих букв,
не являющихся буквами 8859-1 в кодировке КОИ-8 ;)
Hадо вправить emacs'у syntax table, делов-то. Во. Hашел. Авторство
-- Ильи Перминова.
rus-syntax.el:
;;;; Russian syntax and case tables.
(require 'cl)
(require 'rus-encodings)
(defun rus-copy-case-table (case-table)
(if (listp case-table)
;; Old Emacses and XEmacs.
(list (copy-sequence (car case-table)) nil nil nil)
(copy-case-table case-table))
)
(defun case-table-aset (ct x y)
(if (listp ct) (setq ct (car ct)))
(aset ct x y)
)
(defun rus-syntax-table ()
"Set syntax and case tables for the current buffer according to encoding
of russian letters in the buffer. The encoding must be in variable
RUS-BUFFER-ENCODING."
(let* ((e (rus-encoding rus-buffer-encoding))
(ct (rus-copy-case-table (current-case-table)))
(st (copy-syntax-table (syntax-table)))
(lc-chars (substring e 0 (/ (length e) 2)))
(uc-chars (substring e (/ (length e) 2))))
(mapcar (function (lambda (x) (modify-syntax-entry x "w" st))) e)
(mapcar* (function (lambda (x y) (case-table-aset ct x y)))
lc-chars lc-chars)
(mapcar* (function (lambda (x y) (case-table-aset ct x y)))
uc-chars lc-chars)
(set-syntax-table st)
(set-case-table ct))
)
(add-hook 'rus-set-buffer-encoding-hook 'rus-syntax-table)
(provide 'rus-syntax)
rus-encodings.el
;;;; Various encodings of russian letters.
;;;; Each encoding definition is a sequence of codes(numbers) of
;;;; small letters in alphabet order and then capital letters
;;;; in alphabet order.
(defconst rus-encoding-alt
(concat
"\240\241\242\243\244\245\361\246\247"
"\250\251\252\253\254\255\256\257"
"\340\341\342\343\344\345\346\347"
"\350\351\352\353\354\355\356\357"
"\200\201\202\203\204\205\360\206\207"
"\210\211\212\213\214\215\216\217"
"\220\221\222\223\224\225\226\227"
"\230\231\232\233\234\235\236\237"))
(defconst rus-encoding-8koi
(concat
"\301\302\327\307\304\305\243\326\332"
"\311\312\313\314\315\316\317\320"
"\322\323\324\325\306\310\303\336"
"\333\335\337\331\330\334\300\321"
"\341\342\367\347\344\345\263\366\372"
"\351\352\353\354\355\356\357\360"
"\362\363\364\365\346\350\343\376"
"\373\375\377\371\370\374\340\361"))
(defconst rus-encoding-cp1251
(concat
"\340\341\342\343\344\345\270\346\347"
"\350\351\352\353\354\355\356\357"
"\360\361\362\363\364\365\366\367"
"\370\371\372\373\374\375\376\377"
"\300\301\302\303\304\305\250\306\307"
"\310\311\312\313\314\315\316\317"
"\320\321\322\323\324\325\326\327"
"\330\331\332\333\334\335\336\337"))
(defconst rus-encoding-jcuken
(concat
"f,dult/;pbqrkvyjghcnea[wxio]sm'.z"
"F<DULT?:PBQRKVYJGHCNEA{WXIO}SM\">Z"))
(defconst rus-encoding-ascii
(concat
"abwgde^vzijklmnoprstufhc=[]#yx\\`q"
"ABWGDE&VZIJKLMNOPRSTUFHC+{}$YX|~Q"))
;;; Alist mapping encoding names to their definitions.
(defvar rus-encodings-alist '(
("koi8" . rus-encoding-8koi)
("cp1251" . rus-encoding-cp1251)
("alt" . rus-encoding-alt))
"Alist mapping encoding names to their definitions.")
(defun rus-encoding (name)
"Return the definition of the encoding NAME of russian letters."
(eval (cdr (assoc name rus-encodings-alist)))
)
(defvar rus-buffer-encoding nil
"Encoding of russian chars in the current buffer.")
(make-variable-buffer-local 'rus-buffer-encoding)
(defvar rus-set-buffer-encoding-hook nil
"List of functions to call after encoding of russian letters in the
current buffer is set.")
(defun rus-set-buffer-encoding (&optional encoding)
"Read encoding of russian chars for the current buffer and
set RUS-BUFFER-ENCODING respectively."
(interactive)
(if encoding
(setq rus-buffer-encoding encoding)
(let ((e (or (rus-guess-buffer-encoding) "koi8")))
(setq rus-buffer-encoding
(completing-read (concat "Buffer encoding (default " e "): ")
rus-encodings-alist nil t))
(if (equal rus-buffer-encoding "")
(setq rus-buffer-encoding e))))
(run-hooks 'rus-set-buffer-encoding-hook)
)
(defun rus-auto-set-buffer-encoding (&optional ask)
(interactive "P")
(rus-set-buffer-encoding (and (not ask) (rus-guess-buffer-encoding)))
)
(require 'cl)
(defconst frequent_pairs '(
(18 . 19)
(19 . 15)
(17 . 0)
(19 . 5)
(16 . 15)
( 5 . 19)
( 2 . 0)
(15 . 2)
(17 . 15)
(15 . 12)
( 9 . 18)
(14 . 15)
(11 . 15)
( 5 . 17)
(16 . 17)
(14 . 0)
(14 . 9)
( 5 . 14)
(19 . 29)
( 0 . 19))
"Pairs of russian letters that occurs in russian texts most frequently.")
(defconst recognition-level 10
"How much pairs of russian letters from FREQUENT_PAIRS (in %) must be
in a text to recognize the text as russian (in corresponding encoding).")
(defvar max-length-of-text-to-analyze 5000
"How many letters RUS-GUESS-BUFFER-ENCODING should analyze.")
(defun rus-guess-buffer-encoding ()
"Analyze current buffer and if it contains russian text return the name of
of the text encoding."
(let ((i 0) c (prev -1) (freqs (make-vector 128 nil)) (count 0) encoding
(lim (if (> (- (point-max) (point-min)) max-length-of-text-to-analyze)
(+ (point-min) max-length-of-text-to-analyze) (point-max))))
;; Make empty table.
(while (< i 128)
(aset freqs i (make-vector 128 0))
(setq i (1+ i)))
;; Scan current buffer, calculate frequencies of char pairs
;; and store them to the table.
(setq i (point-min))
(while (< i lim)
(setq c (- (char-after i) 128))
(if (and (>= c 0) (<= c 127))
(progn
(setq count (1+ count))
(if (and (>= prev 0) (<= prev 127))
(aset (aref freqs prev) c (1+ (aref (aref freqs prev) c))))))
(setq prev c)
(setq i (1+ i)))
;; Detect encoding.
(some (function (lambda (ename)
(let* ((e (rus-encoding ename))
(sum (reduce (function
(lambda (s p)
(+ s
(aref (aref freqs
(- (aref e (car p))
128))
(- (aref e (cdr p))
128)))))
frequent_pairs :initial-value 0)))
(if (and (> sum 0)
(> (/ (* sum 100) count) recognition-level))
ename
nil))))
(mapcar 'car rus-encodings-alist)))
)
(provide 'rus-encodings)
--
Best regards, -- Boris.
Some people are only alive because it is illegal to kill them.
--- Gnus v5.5/XEmacs 20.3 - "London" * Origin: Linux inside (2:5020/510@fidonet)