diff options
| author | David Aspinall | 2008-01-25 18:29:44 +0000 |
|---|---|---|
| committer | David Aspinall | 2008-01-25 18:29:44 +0000 |
| commit | 230ef6d2ca8c538a471bb80ec2899559c61600bd (patch) | |
| tree | 4ba871bf361bc58c9706a1160db4275c4a5f903d | |
| parent | 5b8d8e42498b0f1141ea5f39c8490567d8f8fa30 (diff) | |
New files.
| -rw-r--r-- | lib/unicode-tokens.el | 351 |
1 files changed, 351 insertions, 0 deletions
diff --git a/lib/unicode-tokens.el b/lib/unicode-tokens.el new file mode 100644 index 00000000..61308943 --- /dev/null +++ b/lib/unicode-tokens.el @@ -0,0 +1,351 @@ +;;; unicode-tokens.el --- Support for editing tokens for Unicode characters +;; +;; Copyright(C) 2008 David Aspinall / LFCS Edinburgh +;; Author: David Aspinall <David.Aspinall@ed.ac.uk> +;; +;; This is a partial replacement for X-Symbol for Proof General. +;; +;; Some functions are taken from `xmlunicode.el' by Norman Walsh. +;; Created: 2004-07-21, Version: 1.6, Copyright (C) 2003 Norman Walsh +;; Inspired in part by sgml-input, Copyright (C) 2001 Dave Love +;; Inspired in part by +;; http://www.tbray.org/ongoing/When/200x/2003/09/27/UniEmacs +;; +;; +;; This is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; This software is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary: + +;; +;; Functions to help insert tokens that represent Unicode characters. +;; Tokens are used for programs that do not understand a Unicode encoding. +;; +;; TODO: +;; -- add coding/decoding for saving/reading files + + +(require 'cl) + +(require 'unicode-chars) ; list of Unicode characters + +;; +;; Variables that should be set +;; (default settings are for XML, but configuration incomplete; +;; use xmlunicode.el instead) +;; + +(defvar unicode-tokens-charref-format "&#x%x;" + "The format for numeric character references") + +(defvar unicode-tokens-token-format "&%x;" + "The format for token character references") + +(defvar unicode-token-name-alist nil + "Mapping of token names to Unicode character names.") + +(defvar unicode-tokens-glyph-list nil + "List of available glyphs, as characters. +If not set, constructed to include glyphs for all tokens. ") + +(defvar unicode-tokens-token-prefix nil + "Prefix for start of tokens to insert.") + +(defvar unicode-tokens-token-suffix nil + "Suffix for end of tokens to insert.") + +(defvar unicode-tokens-token-match nil + "Regexp matching tokens") + +(defvar unicode-tokens-hexcode-match nil + "Regexp matching numeric token string") + + + +;; +;; Variables initialised in unicode-tokens-initialise +;; + +(defvar unicode-token-name-codepoint-alist nil + "Mapping of token names to Unicode codepoints.") + +(defvar unicode-tokens-max-token-length 10 + "Maximum length of a token in underlying encoding.") + + +;; +;;; Code: +;; + +(defun unicode-tokens-insert-char (arg codepoint) + "Insert the Unicode character identified by CODEPOINT. +If ARG is non-nil, ignore available glyphs." + (let ((glyph (memq codepoint unicode-tokens-glyph-list))) + (cond + ((and (decode-char 'ucs codepoint) (or arg glyph)) + (ucs-insert codepoint)) ;; glyph converted to token on save + (t + (insert (format unicode-tokens-charref-format codepoint)))))) + +(defun unicode-tokens-character-insert (arg &optional argname) + "Insert a Unicode character by character name, with completion. +If a prefix is given, the character will be inserted regardless +of whether or not it has a displayable glyph; otherwise, a +numeric character reference is inserted if the codepoint is not +in the `unicode-tokens-glyph-list'. If argname is given, it is used for +the prompt. If argname uniquely identifies a character, that +character is inserted without the prompt." + (interactive "P") + (let* ((completion-ignore-case t) + (uniname (if (stringp argname) argname "")) + (charname + (if (eq (try-completion uniname unicode-chars-alist) t) + uniname + (completing-read + "Unicode name: " + unicode-chars-alist + nil t uniname))) + codepoint glyph) + (setq codepoint (cdr (assoc charname unicode-chars-alist))) + (unicode-tokens-insert-char arg codepoint))) + +(defun unicode-tokens-token-insert (arg &optional argname) + "Insert a Unicode character by a token name, with completion. +If a prefix is given, the character will be inserted regardless +of whether or not it has a displayable glyph; otherwise, a +numeric character reference is inserted if the codepoint is not +in the unicode-tokens-glyph-list. If argname is given, it is used for +the prompt. If argname uniquely identifies a character, that +character is inserted without the prompt." + (interactive "P") + (let* ((stokname (if (stringp argname) argname "")) + (tokname + (if (eq (try-completion stokname unicode-token-name-alist) t) + stokname + (completing-read + "Token name: " + unicode-token-alist + nil t stokname))) + charname codepoint glyph) + (setq charname (cdr (assoc tokame unicode-token-name-alist))) + (setq codepoint (cdr-safe (assoc charname unicode-chars-alist))) + (unicode-tokens-insert-char arg codepoint))) + + +(defun unicode-tokens-replace-token-after (length) + (let ((bpoint (point))) + (save-excursion + (forward-char length) + (save-match-data + (while (re-search-backward + unicode-token-match + (max (- bpoint unicode-tokens-max-token-length) + (point-min)) t nil) + (setq codept + (assoc (match-string 1) unicode-tokens-codept-alist)) + (if (and codept + (memq (cadr codept) unicode-tokens-glyph-list)) + (progn + (replace-match (format "%c" (decode-char 'ucs (cadr codept)))) + ;; FIXME: return correct length + ;; something like 1-length-point modulo chars replaced + (setq length length))))))) + length) + + +;;stolen from hen.el which in turn claims to have stolen it from cxref +(defun unicode-tokens-looking-backward-at (regexp) + "Return t if text before point matches regular expression REGEXP. +This function modifies the match data that `match-beginning', +`match-end' and `match-data' access; save and restore the match +data if you want to preserve them." + (save-excursion + (let ((here (point))) + (if (re-search-backward regexp (point-min) t) + (if (re-search-forward regexp here t) + (= (point) here)))))) + +(defun unicode-tokens-smart-suffix () + "Detect tokens and replace them with the appropriate char. +This can be bound to the character ending `unicode-tokens-token-suffix' +if there is such a unique character." + (interactive) + (let ((pos (point)) + amppos codept) + (search-backward unicode-tokens-token-prefix nil t nil) + (setq amppos (point)) + (goto-char pos) + (cond + ((unicode-tokens-looking-backward-at unicode-tokens-token-match) + (progn + (re-search-backward unicode-tokens-token-match nil t nil) + (if (= amppos (point)) + (progn + (setq codept + (assoc (match-string 1) + unicode-tokens-token-codept-alist)) + (if (and codept + (memq (cdr codept) unicode-tokens-glyph-list)) + (replace-match (format "%c" (decode-char 'ucs (cdr codept)))) + (progn + (goto-char pos) + (insert unicode-tokens-token-suffix)))) + (progn + (goto-char pos) + (insert unicode-tokens-token-suffix))))) + ((unicode-tokens-looking-backward-at unicode-tokens-hexcode-match) + (progn + (re-search-backward unicode-tokens-hexcode-match nil t nil) + (if (= amppos (point)) + (progn + (setq codept (string-to-number (match-string 1) 16)) + (if (memq codept unicode-tokens-glyph-list) + (replace-match (format "%c" (decode-char 'ucs (cdr codept)))) + (progn + (goto-char pos) + (insert unicode-tokens-token-suffix)))) + (progn + (goto-char pos) + (insert unicode-tokens-token-suffix))))) + (t + (insert unicode-tokens-token-suffix))))) + +;; +;; Coding system for saving tokens in plain ASCII. +;; + +(make-coding-system 'unicode-tokens-coding-system + 4 ; ccl + "Unicode token coding system" + (cons 'unicode-tokens-ccl-decode + 'unicode-tokens-ccl-encode)) + + +;; Emacs-Unicode/23: +;;(define-coding-system 'unicode-tokens-coding-system +;; "Unicode token coding system" +;; :mnemonic ?T +;; :coding-type 'raw-text ; could be 'undecided/ccl +;; ;; omit for automatic guess :eol-type +;; ;; :charset-list iso-20222 +;; :ascii-compatible-p t +;; :decode-translation-table +;; :encode-translation-table ) + + + +;; +;; Setup quail for Unicode tokens mode +;; + +(require 'quail) + +(quail-define-package + "Unicode tokens" "UTF-8" "T" t + "Unicode characters input method using application specific token names" + nil t nil nil nil t nil nil nil nil t) + +(defun unicode-tokens-quail-define-rules () + "Define the token input rules. +Calculated from `unicode-tokens-token-name-alist' and `unicode-tokens-glyph-list'." + (let ((ulist unicode-tokens-token-name-alist) + codepoint glyph tokname token + unicode-tokens-quail-define-rules) + (while ulist + (setq tokname (caar ulist)) + (setq charname (cdar ulist)) + (setq codepoint (cdr (assoc charname unicode-chars-alist))) + (unless codepoint + (error "Unicode character %s not found" charname)) + (setq glyph (memq codepoint unicode-tokens-glyph-list)) + (setq token (format unicode-tokens-token-format tokname)) + (cond + ((and glyph (decode-char 'ucs codepoint)) + (nconc unicode-tokens-quail-define-rules + (list (list token (decode-char 'ucs codepoint)))))) +; (t ; still use token if no glyph +; (nconc unicode-tokens-quail-define-rules +; (list (list token (vector token)))))) + (setq ulist (cdr ulist))) + (eval unicode-tokens-quail-define-rules))) + + + + +;; +;; Minor mode +;; + +(defvar unicode-tokens-mode-map (make-sparse-keymap) + "Key map used for Unicode Tokens mode.") + +(define-minor-mode unicode-tokens-mode + "Minor mode for unicode token input." + nil + nil ; input method indication already + unicode-tokens-mode-map + (when unicode-tokens-mode + (set-buffer-multibyte t) + (decode-coding-region (point-min) (point-max) + 'unicode-tokens-coding-system)) + (unless unicode-tokens-mode + (set-buffer-multibyte nil) + (encode-coding-region (point-min) (point-max) + 'unicode-tokens-coding-system)) + ;(toggle-enable-multibyte-characters unicode-tokens-mode) + (set-input-method "Unicode tokens" unicode-tokens-mode) + ) + +;; +;; Initialisation +;; + +(defun unicode-tokens-initialise () + "Initialise tables." + ;; Calculate max token length + (let ((tlist unicode-token-name-alist) + (len 0) tok) + (while tlist + (when (> (length (car tlist)) 0) + (setq len (length (car tlist))) + (setq tok (car tlist))) + (setq tlist (cdr tlist))) + (setq unicode-tokens-max-token-length + (length (format unicode-tokens-token-format tok)))) + ;; Default assumed available glyph list based on tokens; + ;; TODO: filter with what's really available, if can find out. + (setq unicode-tokens-token-codept-alist + (mapcar (lambda (uchar) + (let ((codept (assoc (cdr uchar) unicode-chars-alist))) + (unless codept + (error "Unicode character %s unknown" uchar)) + (cons (car uchar) (cdr codept)))) + unicode-tokens-token-name-alist)) + (unless unicode-tokens-glyph-list + (setq unicode-tokens-glyph-list + (mapcar 'cdr unicode-tokens-token-codept-alist))) + (unicode-tokens-quail-define-rules) + ;; Keys + (if (= (length unicode-tokens-token-suffix) 1) + (define-key unicode-tokens-mode-map + (vector (string-to-char unicode-tokens-token-suffix)) + 'unicode-tokens-smart-suffix)) + ;; otherwise action on space like in X-Symbol? + ) + + +(provide 'unicode-tokens) + +;;; unicode-tokens.el ends here |
