diff options
| author | David Aspinall | 2008-01-25 13:58:27 +0000 |
|---|---|---|
| committer | David Aspinall | 2008-01-25 13:58:27 +0000 |
| commit | 7fab2fcb320d72fd528b365c4f52dac0fbd4594b (patch) | |
| tree | 31481b61ef5ce289f0c6a503b30b02164be25bf4 /lib/xmlunicode.el | |
| parent | e8758ba9d1ef210adb4058726da721d87f49debb (diff) | |
Deleted file
Diffstat (limited to 'lib/xmlunicode.el')
| -rw-r--r-- | lib/xmlunicode.el | 786 |
1 files changed, 0 insertions, 786 deletions
diff --git a/lib/xmlunicode.el b/lib/xmlunicode.el deleted file mode 100644 index cc19d9cf..00000000 --- a/lib/xmlunicode.el +++ /dev/null @@ -1,786 +0,0 @@ -;;; xmlunicode.el --- Unicode support for XML -*- coding: utf-8 -*- - -;; $Id$ - -;; Copyright (C) 2003 Norman Walsh -;; Inspired in part by sgml-input, Copyright (C) 2001 Dave Love -;; Inspired in part by http://www.tbray.org/ongoing/When/200x/2003/09/27/UniEmacs - -;; Author: Norman Walsh <ndw@nwalsh.com> -;; Maintainer: Norman Walsh <ndw@nwalsh.com> -;; Created: 2004-07-21 -;; Version: 1.6 -;; CVS ID: $Id$ -;; Keywords: utf-8 unicode xml characters - -;; This file is NOT part of GNU emacs. - -;; This is free software; you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) -;; any later version. - -;; This software is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. - -;; You should have received a copy of the GNU General Public License -;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. - -;;; Commentary - -;; This file provides a suite of functions designed to make it easier -;; to enter Unicode into Emacs. It is not, in fact, particularly XML-specific though -;; it does define an 'xml input-mode and does support the ISO 8879 entity names. - -;;; Usage - -;; 1. Before loading this file, make sure that the variable unicode-character-list is -;; defined. The unicode-character-list is a list of triples of the form: -;; -;; (codepoint "unicode name" "iso name") ; iso name can be nil -;; -;; e.g.: (defvar unicode-character-list -;; '( -;; ;Codept Unicode name ISO Name -;; (#x000000 "NULL" nil ) -;; (#x000001 "START OF HEADING" nil ) -;; ... -;; (#x0000a0 "NO-BREAK SPACE" "nbsp" ) -;; (#x0000a1 "INVERTED EXCLAMATION MARK" "iexcl" ) -;; (#x0000a2 "CENT SIGN" "cent" ) -;; ...)) -;; -;; -;; The easiest way to define this list is to load "unichars.el" -;; which should be available where you got this file. -;; -;; 2. Bind the functions defined in this file to keys you find convenient. -;; -;; The likely candidates are: -;; -;; unicode-character-insert insert a character by unicode name -;; (with completion) -;; iso8879-character-insert insert a character by ISO entity name -;; (with completion) -;; unicode-smart-double-quote inserts an appropriate double quote -;; unicode-smart-single-quote inserts an appropriate single quote -;; unicode-character-menu-insert choose special character from a popup menu -;; unicode-character-shortcut-insert enter a two-character shortcut for a -;; unicode character -;; -;; You can also create a standard Emacs menu for the character menu list -;; (instead of, or in addition to, the popup). To do that: -;; -;; (define-key APPROPRIATE-MAP [menu-bar unichar] -;; (cons "UniChar" unicode-character-menu-map)) -;; -;; Where APPROPRIATE-MAP is the name of the emacs keymap to bind into -;; -;; 3. If you want to use the xml input-mode, which provides automatic replacement for the -;; ISO entity names: -;; -;; (set-input-method 'xml) -;; -;; in the appropriate context. Unlike sgml-input, xml-input only inserts the -;; characters for which you have glyphs. It inserts other characters as numeric -;; character references. (If you want to insert a literal character even if -;; you don't have it in your fonts, use unicode-character-insert or -;; iso8879-character-insert with a prefix.) - -;;; Changes - -;; v1.7 -;; Require "cl" because, well, because it's required. Also fiddled with -;; the way single quotes are handled; the apostrophe is now part of the -;; cycle -;; v1.6 -;; Remove debugging code. Embarrassed again. :-( -;; v1.5 -;; Fixed bug in unicode-smart-single-quote. It wasn't cycling through all -;; three quotes correctly because of a typo in the function definition. -;; Make sure smart semicolon insertion only happens if we're right at the -;; end of a numeric character reference. -;; v1.4 -;; Fixed bug in insert-smart-semicolon. It wasn't careful to tie the search -;; to the most recent preceding ampersand. -;; v1.3 -;; Fixed bug in (in-comment) -;; Added unicode-smart-semicolon as another convenience for entering Unicode chars -;; Added show-unicode-character-list -;; v1.2 -;; Added unicode-smart-hyphen for easy insert of mdash and ndash -;; Added unicode-smart-period for easy insert of hellip -;; Fixed a bug in unicode-smart-single-quote -;; v1.1 -;; Fixed a few bugs with respect to how numeric character references are entered. -;; Added xml-tag-search-limit and unicode-charref-format -;; v1.0 -;; First release. Nearly a complete rewrite from the former xmlchars.el file - -;;; Code: - -(require 'cl) - -(defvar unicode-ldquo (decode-char 'ucs #x00201c)) -(defvar unicode-rdquo (decode-char 'ucs #x00201d)) -(defvar unicode-lsquo (decode-char 'ucs #x002018)) -(defvar unicode-rsquo (decode-char 'ucs #x002019)) -(defvar unicode-quot (decode-char 'ucs #x000022)) -(defvar unicode-apos (decode-char 'ucs #x000027)) -(defvar unicode-capos (decode-char 'ucs #x0002bc)) -(defvar unicode-ndash (decode-char 'ucs #x002013)) -(defvar unicode-mdash (decode-char 'ucs #x002014)) -(defvar unicode-hellip (decode-char 'ucs #x002026)) - -(defvar unicode-charref-format "&#x%x;" - "The format for numeric character references") - -(defvar xml-tag-search-limit 4096 - "Maximum distance to search from point for tag start characters") - -(defvar unicode-character-list-file "/define/this/before/you/load/me" - "The name of the file that contains your unicode-character-list. unichars.el should be available where you got this file.") - -(if (not (boundp 'unicode-character-list)) - (load-file unicode-character-list-file)) - -(defvar unicode-character-alist '() - "Mapping of Unicode character names to codepoints.") - -(let ((ulist unicode-character-list)) - (setq unicode-character-alist - (list (cons (cadr (car ulist)) (car (car ulist))))) - (setq ulist (cdr ulist)) - (while ulist - (nconc unicode-character-alist - (list (cons (cadr (car ulist)) (car (car ulist))))) - (setq ulist (cdr ulist)))) - -(defvar iso8879-character-alist '() - "Mapping of ISO 8879 entity names names to codepoints.") - -(let ((ulist unicode-character-list)) - (while (and ulist (not (caddr (car ulist)))) - (setq ulist (cdr ulist))) - (setq iso8879-character-alist - (list (cons (caddr (car ulist)) (car (car ulist))))) - (setq ulist (cdr ulist)) - (while ulist - (if (caddr (car ulist)) - (nconc iso8879-character-alist - (list (cons (caddr (car ulist)) (car (car ulist)))))) - (setq ulist (cdr ulist)))) - -(defun iso8879-to-codepoints (&optional isolist) - "Converts a list of ISO 8879 entity names to a list of codepoints. This is a convenience function for defining the glyph list." - (let (codepoint-list) - (setq codepoint-list (list 0)) - (while isolist - (nconc codepoint-list - (list (cdr (assoc (car isolist) iso8879-character-alist)))) - (setq isolist (cdr isolist))) - (cdr codepoint-list))) - -(defun unicode-to-codepoints (&optional unilist) - "Converts a list of Unicode character names to a list of codepoints. This is a convenience function for defining the glyph list." - (let (codepoint-list) - (setq codepoint-list (list 0)) - (while unilist - (nconc codepoint-list - (list (cdr (assoc (car isolist) unicode-character-alist)))) - (setq unilist (cdr unilist))) - (cdr codepoint-list))) - -(defvar unicode-glyph-list - (append - '(?A ?B ?C ?D ?E ?F ?G ?H ?I ?J ?K ?L ?M - ?N ?O ?P ?Q ?R ?S ?T ?U ?V ?W ?X ?Y ?Z - ?a ?b ?c ?d ?e ?f ?g ?h ?i ?j ?k ?l ?m - ?n ?o ?p ?q ?r ?s ?t ?u ?v ?w ?x ?y ?z - ?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9 ?! ?@ ?# - ?$ ?% ?^ ?* ?( ?) ?- ?_ ?= ?+ ?\ ?| - ?[ ?] ?{ ?} 59 ?: ?/ ?? ?. 44 96 126) - (iso8879-to-codepoints - '("AElig" "Aacute" "Abreve" "Acirc" "Agrave" "Amacr" "Aogon" - "Aring" "Atilde" "Auml" "Cacute" "Ccaron" "Ccedil" "Ccirc" - "Cdot" "Dagger" "Dcaron" "Dot" "Dstrok" "ENG" "ETH" - "Eacute" "Ecaron" "Ecirc" "Edot" "Egrave" "Emacr" "Eogon" - "Euml" "Gbreve" "Gcedil" "Gcirc" "Gdot" "Hcirc" - "Hstrok" "IJlig" "Iacute" "Icirc" "Idot" "Igrave" - "Imacr" "Iogon" "Itilde" "Iuml" "Jcirc" "Kcedil" - "Lacute" "Lcaron" "Lcedil" "Lmidot" "Lstrok" "Nacute" - "Ncaron" "Ncedil" "Ntilde" "OElig" "Oacute" "Ocirc" - "Odblac" "Ograve" "Omacr" "Oslash" "Otilde" "Ouml" - "Racute" "Rcaron" "Rcedil" "Sacute" "Scaron" "Scedil" - "Scirc" "THORN" "Tcaron" "Tcedil" "Tstrok" "Uacute" - "Ubreve" "Ucirc" "Udblac" "Ugrave" "Umacr" "Uogon" - "Uring" "Utilde" "Uuml" "Wcirc" "Yacute" "Ycirc" - "Yuml" "Zacute" "Zcaron" "Zdot" "aacute" "abreve" - "acirc" "acute" "aelig" "agrave" "amacr" "angst" - "aogon" "aring" "ast" "atilde" "auml" - "b.mu" "bdquo" "blank" "blk12" "blk14" "blk34" - "block" "boxDL" "boxDR" "boxH" "boxHD" "boxHU" - "boxUL" "boxUR" "boxV" "boxVH" "boxVL" "boxVR" - "boxVh" "boxdl" "boxdr" "boxh" "boxhd" "boxhu" - "boxul" "boxur" "boxv" "boxvH" "boxvh" "boxvl" - "boxvr" "breve" "brvbar" "bsol" "bull" "cacute" - "caron" "ccaron" "ccedil" "ccirc" "cdot" "cedil" - "cent" "circ" "colon" "comma" "commat" "copy" - "curren" "dagger" "dash" "dblac" "dcaron" "deg" - "die" "divide" "dollar" "dot" "dstrok" "eacute" - "ecaron" "ecirc" "edot" "egrave" "emacr" "emsp" - "emsp13" "emsp14" "eng" "ensp" "eogon" "equals" - "equiv" "eth" "euml" "excl" "exist" "fnof" - "forall" "frac12" "frac14" "frac34" "frasl" "gacute" - "gbreve" "gcedil" "gcirc" "gdot" "ge" "ges" - "grave" "hairsp" "half" "hcirc" "hellip" - "horbar" "hstrok" "hyphen" "iacute" "icirc" "iexcl" - "igrave" "ijlig" "imacr" "inodot" "inodot" "iogon" - "iquest" "itilde" "iuml" "jcirc" "kcedil" "kgreen" - "lacute" "laquo" "lcaron" "lcedil" "lcub" "ldquo" - "ldquor" "le" "les" "lhblk" "lmidot" "lowbar" - "lpar" "lsaquo" "lsqb" "lsquo" "lsquor" "lstrok" - "macr" "mdash" "mgr" "micro" "middot" "minus" - "mldr" "mu" "nacute" "napos" "nbsp" "ncaron" - "ncedil" "ndash" "ne" "nequiv" "nexist" "nge" - "nges" "ngt" "nle" "nles" "nlt" "not" - "ntilde" "num" "numsp" "oacute" "ocirc" "odblac" - "oelig" "ogon" "ograve" "omacr" "ordf" "ordm" - "oslash" "otilde" "ouml" "para" "percnt" "period" - "permil" "plus" "plusmn" "pound" "puncsp" "quest" - "racute" "raquo" "rcaron" "rcedil" "rcub" - "rdquo" "rdquor" "reg" "ring" "rpar" "rsaquo" - "rsqb" "rsquo" "rsquor" "sacute" "sbquo" "sbsol" - "scaron" "scedil" "scirc" "sect" "semi" "shy" - "sol" "sup1" "sup2" "sup3" "szlig" "tcaron" - "tcedil" "thinsp" "thorn" "tilde" "times" "trade" - "tstrok" "uacute" "ubreve" "ucirc" "udblac" "ugrave" - "uhblk" "umacr" "uml" "uogon" "uring" "utilde" - "uuml" "verbar" "wcirc" "wedgeq" "yacute" "ycirc" - "yen" "yuml" "zacute" "zcaron" "zdot"))) - "A list of Unicode codepoints identifying the characters that display correctly in your Emacs with your fonts.") - -;; Insert characters by Unicode name (with completion) - -(defun unicode-character-insert (arg &optional argname) - "Insert a Unicode character by character name. If a prefix is given, the character will be inserted regardless of whether or not it has a displayable glyph; otherwise, a numeric character reference is inserted if the codepoint is not in the unicode-glyph-list. If argname is given, it is used for the prompt. If argname uniquely identifies a character, that character is inserted without the prompt." - (interactive "P") - (let* ((completion-ignore-case t) - (uniname (if (stringp argname) argname "")) - (charname - (if (eq (try-completion uniname unicode-character-alist) t) - uniname - (completing-read - "Unicode name: " - unicode-character-alist - nil t uniname))) - codepoint glyph) - (setq codepoint (cdr (assoc charname unicode-character-alist))) - (xml-unicode-insert arg codepoint))) - -;; Insert characters by iso8879 name - -(defun iso8879-character-insert (arg &optional argname) - "Insert a Unicode character by ISO 8879 entity name. If a prefix is given, the character will be inserted regardless of whether or not it has a displayable glyph; otherwise, a numeric character reference is inserted if the codepoint is not in the unicode-glyph-list. If argname is given, it is used for the prompt. If argname uniquely identifies a character, that character is inserted without the prompt." - (interactive "P") - (let* ((isoname (if (stringp argname) argname "")) - (charname - (if (eq (try-completion isoname iso8879-character-alist) t) - isoname - (completing-read - "ISO name: " - iso8879-character-alist - nil t isoname))) - codepoint glyph) - (setq codepoint (cdr (assoc charname iso8879-character-alist))) - (xml-unicode-insert arg codepoint))) - -(defun xml-unicode-insert (arg codepoint) - "Insert the Unicode character identified by codepoint taking into account available glyphs and XML predefined entities." - (interactive "P") - (let ((glyph (memq codepoint unicode-glyph-list))) - (cond - ((and (decode-char 'ucs codepoint) (or arg glyph)) - (ucs-insert codepoint)) - ((= codepoint 34) - (insert """)) - ((= codepoint 38) - (insert "&")) - ((= codepoint 39) - (insert "'")) - ((= codepoint 60) - (insert "<")) - ((= codepoint 62) - (insert ">")) - (t - (insert (format unicode-charref-format codepoint)))))) - -;; Menus - -(defvar unicode-character-menu-alist - '( - ("angst" . #x212B) - ("cent" . #x00A2) - ("copy" . #x00A9) - ("Dagger" . #x2021) - ("dagger" . #x2020) - ("deg" . #x00B0) - ("emsp" . #x2003) - ("ensp" . #x2002) - ("ETH" . #x00D0) - ("eth" . #x00F0) - ("euro" . #x20AC) - ("half" . #x00BD) - ("laquo" . #x00AB) - ("ldquo" . #x201c) - ("lsquo" . #x2018) - ("mdash" . #x2014) - ("micro" . #x00B5) - ("middot" . #x00B7) - ("nbsp" . #x00A0) - ("ndash" . #x2013) - ("not" . #x00AC) - ("numsp" . #x2007) - ("para" . #x00B6) - ("permil" . #x2030) - ("puncsp" . #x2008) - ("raquo" . #x00BB) - ("rdquo" . #x201d) - ("rsquo" . #x2019) - ("reg" . #x00AE) - ("sect" . #x00A7) - ("THORN" . #x00DE) - ("thorn" . #x00FE) - ("trade" . #x2122) - ) - "Mapping of names to codepoints for use in the popup or Emacs menu.") - -(defun unicode-character-menu-insert () - "Popup a menu for inserting unicode characters." - (interactive) - (let* ((xml-chars-menu - (list "Special char" (append (list "") unicode-character-menu-alist))) - (value (x-popup-menu t xml-chars-menu))) - (if value (xml-unicode-insert nil value)))) - -(defvar unicode-character-menu-map (make-sparse-keymap "UniChar") - "A menu map for inserting Unicode characters.") - -(defun make-unicode-character-menu-bar () - "Builds the unicode-character-menu-map for the currently defined unicode-character-menu-alist." - (let ((alist (reverse unicode-character-menu-alist)) - name codepoint) - (setq unicode-character-menu-map (make-sparse-keymap "UniChar")) - (while alist - (setq name (car (car alist)) - codepoint (cdr (car alist))) - (define-key unicode-character-menu-map (vector (intern name)) - `(,name . (lambda () (interactive) (xml-unicode-insert nil ,codepoint)))) - (setq alist (cdr alist))))) - -(make-unicode-character-menu-bar) - -;; Simple XML tests - -(defun in-start-tag () - "Crude test to see if point is inside an open start tag." - (interactive) - (let (slim here pgt plt) - (setq here (point)) - (setq slim - (if (> here xml-tag-search-limit) - (- here xml-tag-search-limit) - 0)) - (setq pgt (search-backward ">" slim t)) - (goto-char here) - (setq plt (search-backward "<" slim t)) - (goto-char here) - (if (and pgt plt) - (> plt pgt) - plt))) - -(defun after-start-tag () - "Crude test to see if point is just after a start tag" - (interactive) - (if (and (char-before) (char-equal (char-before) ?>)) - (let (slim here plt psl) - (setq here (point)) - (setq slim - (if (> here xml-tag-search-limit) - (- here xml-tag-search-limit) - 0)) - (setq plt (search-backward "<" slim t)) - (goto-char here) - (setq psl (search-backward "/" slim t)) - (goto-char here) - (or (and plt (not psl)) - (and plt psl (< psl plt)))))) - -(defun in-comment () - "Crude test to see if point is inside a comment." - (interactive) - (let (slim here pgt pcmt) - (setq here (point)) - (setq slim - (if (> here xml-tag-search-limit) - (- here xml-tag-search-limit) - 0)) - (setq pgt (search-backward "-->" slim t)) - (goto-char here) - (setq pcmt (search-backward "<!" slim t)) - (goto-char here) - (if (and pgt pcmt) - (> pcmt pgt) - pcmt))) - -;;stolen from hen.el which in turn claims to have stolen it from cxref -(defun unicode-looking-backward-at (regexp) - "Return t if text before point matches regular expression REGEXP. -This function modifies the match data that `match-beginning', -`match-end' and `match-data' access; save and restore the match -data if you want to preserve them." - (save-excursion - (let ((here (point))) - (if (re-search-backward regexp (point-min) t) - (if (re-search-forward regexp here t) - (= (point) here)))))) - -;; Smart quotes - -(defun unicode-smart-double-quote () - "Insert a left or right double quote as appropriate. Left quotes are inserted after a space, newline, or start tag. Right quotes are inserted after any other character, except if the preceding character is a quote, in which case we cycle through the three quote styles." - (interactive) - (if (char-before) - (let ((ch (char-before))) - (cond - ((in-start-tag) - (insert "\"")) - ((or - (after-start-tag) - (char-equal ch 40) ; ( - (char-equal ch 91) ; [ - (char-equal ch ?{)) ; { - (insert unicode-ldquo)) - ((or - (char-equal ch ?>) ; > - (char-equal ch 41) ; ) - (char-equal ch 93) ; ] - (char-equal ch ?})) ; } - (insert unicode-rdquo)) - ((or (char-equal ch 32) - (char-equal ch 10)) - (insert unicode-ldquo)) - ((char-equal ch unicode-ldquo) - (progn - (delete-backward-char 1) - (insert "\""))) - ((char-equal ch unicode-quot) - (progn - (delete-backward-char 1) - (insert unicode-rdquo))) - ((char-equal ch unicode-rdquo) - (progn - (delete-backward-char 1) - (insert unicode-ldquo))) - ((char-equal ch unicode-ldquo) - (progn - (delete-backward-char 1) - (insert unicode-rdquo))) - ((char-equal ch unicode-lsquo) - (insert unicode-ldquo)) - (t (insert unicode-rdquo)))) - (insert unicode-ldquo))) - -(defun unicode-smart-single-quote () - "Insert a left or right single quote, or an apostrophe, as appropriate. Left quotes are inserted after a space, newline, or start tag. An apostrophe is inserted after any other character, except if the preceding character is a quote or apostrophe, in which case we cycle through the styles." - (interactive) - (if (char-before) - (let ((ch (char-before))) - (cond - ((in-start-tag) - (insert "'")) - ((or - (after-start-tag) - (char-equal ch 40) ; ( - (char-equal ch 91) ; [ - (char-equal ch ?{)) ; { - (insert unicode-lsquo)) - ((or - (char-equal ch ?>) ; > - (char-equal ch 41) ; ) - (char-equal ch 93) ; ] - (char-equal ch ?})) ; } - (insert unicode-rsquo)) - ((or (char-equal ch 32) - (char-equal ch 10)) - (insert unicode-lsquo)) - ((char-equal ch unicode-apos) ; ' -> rsquo - (progn - (delete-backward-char 1) - (insert unicode-rsquo))) - ((char-equal ch unicode-rsquo) ; rsquo -> lsquo - (progn - (delete-backward-char 1) - (insert unicode-lsquo))) - ((char-equal ch unicode-lsquo) ; lsquo -> ' - (progn - (delete-backward-char 1) - (insert unicode-apos))) - (t (insert unicode-apos)))) - (insert unicode-lsquo))) - -(defun unicode-smart-hyphen () - "Insert a hyphen, mdash, or ndash as appropriate. A hyphen, an mdash, and then an ndash is inserted." - (interactive) - (if (char-before) - (let ((ch (char-before))) - (cond - ((in-comment) - (insert "-")) - ((char-equal ch ?-) - (progn - (delete-backward-char 1) - (insert unicode-mdash))) - ((char-equal ch unicode-mdash) - (progn - (delete-backward-char 1) - (insert unicode-ndash))) - ((char-equal ch unicode-ndash) - (progn - (delete-backward-char 1) - (insert "-"))) - (t (insert "-")))) - (insert "-"))) - -(defun unicode-smart-period () - "Insert an hellipsis for three dots." - (interactive) - (if (> (point) 2) - (let ((ch1 (char-before)) - (ch2 (char-before (- (point) 1))) - (ch3 (char-before (- (point) 2)))) - (cond - ((in-comment) - (insert ".")) - ((char-equal ch1 unicode-hellip) - (progn - (delete-backward-char 1) - (insert "...."))) - ((and ch3 (char-equal ch1 ?.) (char-equal ch2 ?.) (char-equal ch3 ?.)) - (insert ".")) - ((and (char-equal ch1 ?.) (char-equal ch2 ?.)) - (progn - (delete-backward-char 2) - (insert unicode-hellip))) - (t (insert ".")))) - (insert "."))) - -(defun unicode-smart-semicolon () - "Detect numeric character references and replace them with the appropriate char." - (interactive) - (let ((pos (point)) - amppos codept) - (search-backward "&" nil t nil) - (setq amppos (point)) - (goto-char pos) - (cond - ((unicode-looking-backward-at "&#[xX][0-9a-fA-F]+") - (progn - (re-search-backward "&#[xX]\\([0-9a-fA-F]+\\)" nil t nil) - (if (= amppos (point)) - (progn - (setq codept (string-to-number (match-string 1) 16)) - (if (memq codept unicode-glyph-list) - (replace-match (format "%c" (decode-char 'ucs codept))) - (progn - (goto-char pos) - (insert ";")))) - (progn - (goto-char pos) - (insert ";"))))) - ((unicode-looking-backward-at "&#[0-9]+") - (progn - (re-search-backward "&#\\([0-9]+\\)" nil t nil) - (if (= amppos (point)) - (progn - (setq codept (string-to-number (match-string 1) 10)) - (if (memq codept unicode-glyph-list) - (replace-match (format "%c" (decode-char 'ucs codept))) - (progn - (goto-char pos) - (insert ";")))) - (progn - (goto-char pos) - (insert ";"))))) - (t - (insert ";"))))) - -;; Setup quail for XML mode - -(require 'quail) - -(quail-define-package - "xml" "UTF-8" "&" t - "Unicode characters input method using ISO 8879 entitie names from the unicode-character-list" - nil t nil nil nil nil nil nil nil nil t) - -(defvar xml-quail-define-rules '() - "The default xml-input rules. Built dynamically from the unicode-character-list and the unicode-glyph-list.") - -(let ((ulist iso8879-character-alist) - codepoint glyph entname) - (setq xml-quail-define-rules (list 'quail-define-rules)) - (while ulist - (setq codepoint (cdr (car ulist))) - (setq glyph (memq codepoint unicode-glyph-list)) - (setq entname (concat "&" (car (car ulist)) ";")) - (cond - ((and glyph (decode-char 'ucs codepoint)) - (nconc xml-quail-define-rules - (list (list entname (decode-char 'ucs codepoint))))) - ((= codepoint 34) - (nconc xml-quail-define-rules - (list (list entname (vector """))))) - ((= codepoint 38) - (nconc xml-quail-define-rules - (list (list entname (vector "&"))))) - ((= codepoint 39) - (nconc xml-quail-define-rules - (list (list entname (vector "'"))))) - ((= codepoint 60) - (nconc xml-quail-define-rules - (list (list entname (vector "<"))))) - ((= codepoint 62) - (nconc xml-quail-define-rules - (list (list entname (vector ">"))))) - (t - (nconc xml-quail-define-rules - (list (list entname (vector (format unicode-charref-format codepoint))))))) - (setq ulist (cdr ulist)))) - -(eval xml-quail-define-rules) - -;; Read two keys - -(defvar unicode-character-shortcut-alist - (list - (cons "AE" (cdr (assoc "AElig" iso8879-character-alist))) - (cons "A'" (cdr (assoc "Aacute" iso8879-character-alist))) - (cons "A^" (cdr (assoc "Acirc" iso8879-character-alist))) - (cons "A`" (cdr (assoc "Agrave" iso8879-character-alist))) - (cons "Ao" (cdr (assoc "Aring" iso8879-character-alist))) - (cons "A~" (cdr (assoc "Atilde" iso8879-character-alist))) - (cons "A\"" (cdr (assoc "Auml" iso8879-character-alist))) - (cons "C," (cdr (assoc "Ccedil" iso8879-character-alist))) - (cons "E'" (cdr (assoc "Eacute" iso8879-character-alist))) - (cons "E^" (cdr (assoc "Ecirc" iso8879-character-alist))) - (cons "E`" (cdr (assoc "Egrave" iso8879-character-alist))) - (cons "E\"" (cdr (assoc "Euml" iso8879-character-alist))) - (cons "I'" (cdr (assoc "Iacute" iso8879-character-alist))) - (cons "I^" (cdr (assoc "Icirc" iso8879-character-alist))) - (cons "I`" (cdr (assoc "Igrave" iso8879-character-alist))) - (cons "I\"" (cdr (assoc "Iuml" iso8879-character-alist))) - (cons "N~" (cdr (assoc "Ntilde" iso8879-character-alist))) - (cons "O'" (cdr (assoc "Oacute" iso8879-character-alist))) - (cons "O^" (cdr (assoc "Ocirc" iso8879-character-alist))) - (cons "O`" (cdr (assoc "Ograve" iso8879-character-alist))) - (cons "O/" (cdr (assoc "Oslash" iso8879-character-alist))) - (cons "O~" (cdr (assoc "Otilde" iso8879-character-alist))) - (cons "O\"" (cdr (assoc "Ouml" iso8879-character-alist))) - (cons "U'" (cdr (assoc "Uacute" iso8879-character-alist))) - (cons "U^" (cdr (assoc "Ucirc" iso8879-character-alist))) - (cons "U`" (cdr (assoc "Ugrave" iso8879-character-alist))) - (cons "U\"" (cdr (assoc "Uuml" iso8879-character-alist))) - (cons "Y'" (cdr (assoc "Yacute" iso8879-character-alist))) - (cons "a'" (cdr (assoc "aacute" iso8879-character-alist))) - (cons "a^" (cdr (assoc "acirc" iso8879-character-alist))) - (cons "ae" (cdr (assoc "aelig" iso8879-character-alist))) - (cons "a`" (cdr (assoc "agrave" iso8879-character-alist))) - (cons "ao" (cdr (assoc "aring" iso8879-character-alist))) - (cons "a~" (cdr (assoc "atilde" iso8879-character-alist))) - (cons "a\"" (cdr (assoc "auml" iso8879-character-alist))) - (cons "c," (cdr (assoc "ccedil" iso8879-character-alist))) - (cons "e'" (cdr (assoc "eacute" iso8879-character-alist))) - (cons "e^" (cdr (assoc "ecirc" iso8879-character-alist))) - (cons "e`" (cdr (assoc "egrave" iso8879-character-alist))) - (cons "e\"" (cdr (assoc "euml" iso8879-character-alist))) - (cons "i'" (cdr (assoc "iacute" iso8879-character-alist))) - (cons "i^" (cdr (assoc "icirc" iso8879-character-alist))) - (cons "i`" (cdr (assoc "igrave" iso8879-character-alist))) - (cons "i\"" (cdr (assoc "iuml" iso8879-character-alist))) - (cons "n~" (cdr (assoc "ntilde" iso8879-character-alist))) - (cons "o'" (cdr (assoc "oacute" iso8879-character-alist))) - (cons "o^" (cdr (assoc "ocirc" iso8879-character-alist))) - (cons "o`" (cdr (assoc "ograve" iso8879-character-alist))) - (cons "o-" (cdr (assoc "omacr" iso8879-character-alist))) - (cons "o/" (cdr (assoc "oslash" iso8879-character-alist))) - (cons "o~" (cdr (assoc "otilde" iso8879-character-alist))) - (cons "o\"" (cdr (assoc "ouml" iso8879-character-alist))) - (cons "sz" (cdr (assoc "szlig" iso8879-character-alist))) - (cons "u'" (cdr (assoc "uacute" iso8879-character-alist))) - (cons "u^" (cdr (assoc "ucirc" iso8879-character-alist))) - (cons "u`" (cdr (assoc "ugrave" iso8879-character-alist))) - (cons "u\"" (cdr (assoc "uuml" iso8879-character-alist))) - (cons "y'" (cdr (assoc "yacute" iso8879-character-alist))) - (cons "y\"" (cdr (assoc "yuml" iso8879-character-alist))) - (cons "12" (cdr (assoc "frac12" iso8879-character-alist))) - (cons "13" (cdr (assoc "frac13" iso8879-character-alist))) - (cons "14" (cdr (assoc "frac14" iso8879-character-alist))) - (cons "15" (cdr (assoc "frac15" iso8879-character-alist))) - (cons "16" (cdr (assoc "frac16" iso8879-character-alist))) - (cons "18" (cdr (assoc "frac18" iso8879-character-alist))) - (cons "23" (cdr (assoc "frac23" iso8879-character-alist))) - (cons "25" (cdr (assoc "frac25" iso8879-character-alist))) - (cons "34" (cdr (assoc "frac34" iso8879-character-alist))) - (cons "35" (cdr (assoc "frac35" iso8879-character-alist))) - (cons "38" (cdr (assoc "frac38" iso8879-character-alist))) - (cons "45" (cdr (assoc "frac45" iso8879-character-alist))) - (cons "56" (cdr (assoc "frac56" iso8879-character-alist))) - (cons "58" (cdr (assoc "frac58" iso8879-character-alist))) - (cons "78" (cdr (assoc "frac78" iso8879-character-alist))) - (cons "<<" (cdr (assoc "laquo" iso8879-character-alist))) - (cons ".." (cdr (assoc "hellip" iso8879-character-alist))) - (cons "!i" (cdr (assoc "iexcl" iso8879-character-alist))) - (cons "?i" (cdr (assoc "iquest" iso8879-character-alist))) - (cons " " (cdr (assoc "nbsp" iso8879-character-alist))) - (cons "+-" (cdr (assoc "plusmn" iso8879-character-alist))) - (cons "--" (cdr (assoc "mdash" iso8879-character-alist))) - (cons "$c" (cdr (assoc "cent" iso8879-character-alist))) - (cons "$e" (cdr (assoc "euro" iso8879-character-alist))) - (cons "$p" (cdr (assoc "pound" iso8879-character-alist))) - (cons "$y" (cdr (assoc "yen" iso8879-character-alist)))) - "Defines a list of two-character shortcuts for keyboard entry of Unicode characters.") - -(defun unicode-character-shortcut-insert () - "Read a (two-character) keyboard shortcut and insert the corresponding character." - (interactive) - (let* ((c1 (read-char)) - (c2 (read-char)) - (str (concat (char-to-string c1) (char-to-string c2)))) - (cond - ((assoc str unicode-character-shortcut-alist) - (xml-unicode-insert nil - (cdr (assoc str unicode-character-shortcut-alist)))) - (t (beep))))) - -(defun show-unicode-character-list () - "Insert each Unicode character into a buffer. Let's you see which characters are available for literal display in your emacs font." - (let ((chars unicode-character-list) - char codept name) - (while chars - (setq char (car chars)) - (setq chars (cdr chars)) - (setq codept (car char)) - (setq name (cadr char)) - - (if (< codept #xffff) - (progn - (insert (format "#x%06x " codept)) - (ucs-insert codept) - (insert (format " %s\n" name))))))) - -;; EOF |
