summaryrefslogtreecommitdiff
path: root/elisp/erbot/contrib/translate.el
blob: f60d9ea1cd4d1b874912e98991fa2c2c6b5045cd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
;; Emacs Lisp Archive Entry
;; Package: translate
;; Filename: translate.el
;; Version: 0.01
;; Keywords: natural language, language, translate, translation
;; Author: Vivek Dasmohapatra <vivek@etla.org>
;; Maintainer: Vivek Dasmohapatra <vivek@etla.org>
;; Created: 2006-05-10
;; Description: use gnome translate/libtranslate to translate text
;; Compatibility: Emacs21, Emacs22
;; Last modified: Fri 2006-05-12 02:52:44 +0100


;; Based on work by:
;; Deepak Goel <deego@gnufans.org>
;; Alejandro Benitez <benitezalejandrogm@gmail.com>

;; This file is NOT (yet) part of GNU Emacs.

;; This is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.

;; This is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING.  If not, write to the
;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.

;; You need to install libtranslate for this to work.  The binary,
;; translate and the library libtranslate.so are provided (for example) 
;; in Ubuntu Dapper: http://packages.ubuntu.com/dapper/libs/libtranslate-bin

(defvar translate-version "0.01")

(defvar translate-pairs nil 
  "A cache for the language pairs. A list of entries of the form: \n
     '((fromaliases) (toaliases) (types)).\n
The first elements of fromaliases and toaliases are the canonical two letter
language codes (possibly with a -XX country variant extension). Any remaining
elements are human-readable aliases. (types) is a list of translation types, 
usually text, and occasionally web-page as well. No other types are currently 
known.")

(defvar translate-unsupported-langs '("he" "pap")
  "Languages (two/three letter codes) that we cannot utf-8 encode yet.")

(defgroup translate nil
  "Translate natural languages using gnome translate (or workalikes)."
  :group 'external
  :prefix "translate-")

(defcustom translate-program "translate" 
  "External translation program."
  :group 'translate
  :type '(choice string file))

(defun translate-req-to-pair (from to)
  "Taking a pair of string arguments, find a matching translation service
and return it as a cons of the form (\"origin\" . \"dest\")"
  (translate-load-pairs)
  (let ( (code nil) )
    (mapc (lambda (p) (if (and (member-ignore-case from (car  p)) 
                               (member-ignore-case to   (cadr p)))
                          (setq code (cons (caar p) (car (cadr p))) )) )
          translate-pairs)
    code))

(defun translate-full-name (code-or-name)
  "Return the full name of a language based on a code or one of its aliases."
  (interactive "sLanguage (eg en or zh-TW): ")
  (translate-load-pairs)
  (let ((name nil) (lang nil) (ldata translate-pairs))
    (while (and ldata (not name))
      (setq lang (car ldata) ldata (cdr ldata))
      (if (member-ignore-case code-or-name (car lang))
          (setq lang (car lang))
        (if (member-ignore-case code-or-name (cadr lang)) 
            (setq lang (cadr lang)) 
          (setq lang nil)))
      (when lang  
        (setq name (mapconcat (lambda (l) (format "%s" l)) (cdr lang) " ")) ))
    name))

(defconst translate-pair-regex 
  (concat "^\\([a-z]\\{2,3\\}\\(?:-..\\)?\\)" ;; language code (from)
          "\\s-+" 
          "(\\(.*\\))"                        ;; language names (from)
          "\\s-+->\\s-+" 
          "\\([a-z]\\{2,3\\}\\(?:-..\\)?\\)"  ;; language code (to)
          "\\s-+"
          "(\\(.*\\)):"                       ;; language aliases (to)
          "\\s-+"
          "\\(.*\\)"))                        ;; capabilities

(defun translate-parse-pair (pair-line)
  "Parse a line of output from `translate-program' --list-pairs, return
an element for insertion into `translate-pairs'."
  (if (string-match translate-pair-regex pair-line)
    (let ( (from       (match-string 1 pair-line))
           (from-alias (match-string 2 pair-line))
           (to         (match-string 3 pair-line))
           (to-alias   (match-string 4 pair-line))
           (cap        (match-string 5 pair-line)) 
           (cleanup    (lambda (x) (replace-regexp-in-string ",.*" "" x))) 
           (from-names nil) 
           (to-names   nil))
      (setq from-alias (split-string from-alias ";")
            to-alias   (split-string to-alias   ";")
            from-alias (mapcar cleanup from-alias)
            to-alias   (mapcar cleanup to-alias  )
            cap        (split-string cap ",\\s-+"))
      (mapc (lambda (x)
              (let ((pos 0))
                (while (setq pos (string-match "\\<\\(\\S-+\\)\\>" x pos))
                  (setq  from-names (cons (match-string 1 x) from-names)
                         pos        (match-end 1)) ))) 
            from-alias)
      (mapc (lambda (x)
              (let ((pos 0))
                (while (setq pos (string-match "\\<\\(\\S-+\\)\\>" x pos))
                  (setq to-names (cons (match-string 1 x) to-names)
                        pos      (match-end 1)) ))) 
            to-alias)
      (list (cons from from-names) 
            (cons to   to-names  ) cap))
    (message "%S does not match.\n" pair-line) nil))

(defun translate-load-pairs (&optional reload)
  "Parse the output of `translate-program' -l into `translate-pairs'
Called interactively with a prefix argument, or non-interactively with a 
non-nil reload argument, it will empty translate-pairs first. Otherwise,
if translate-pairs has already been loaded, it will not do anything."
  (interactive "P")
  (if reload (setq translate-pairs nil))
  (when (not translate-pairs)
    (let ( (y nil) 
           (pair-text (shell-command-to-string
                       (concat translate-program " -l"))) )
      (mapc
       (lambda (x) 
         (when (setq y (translate-parse-pair x)) 
           (setq translate-pairs (cons y translate-pairs))))
       (split-string pair-text "\n")) ))
  translate-pairs)

(defun translate-list-pairs (&optional from to)
  "Return the subset of `translate-pairs' that matches the FROM and TO
arguments."
  (if (string-match "^\\(?:\*\\|any\\|-\\|\\)$" from) (setq from nil))
  (if (string-match "^\\(?:\*\\|any\\|-\\|\\)$" to  ) (setq to   nil))
  (if (not (translate-load-pairs))
      (error "translate doesn't seem to have been setup - no languages found.")
    (cond 
     ( (and (not from) (not to)) ;; neither end point specified
       translate-pairs )
     ( (or (not to) (not from))  ;; one end point specified
       (let ( (op  (if from 'car 'cadr))
              (op2 (if from 'cadr 'car))
              (s   nil)
              (fl  (format "%s" (or from to))) )
         (mapc (lambda (p) (if (member-ignore-case fl (funcall op p)) 
                               (setq s (cons p s))))
               translate-pairs)
         s ))
     (t ;; fully spec'd translation 
      (let ( (s nil) (fl (format "%s" from)) (tl (format "%s" to  )) )
        (mapc (lambda (p) 
                (if (and (member-ignore-case fl (car  p)) 
                         (member-ignore-case tl (cadr p))) 
                    (setq s (cons p s)) )) 
              translate-pairs)
        s) )) ))

(defun translate (from to &rest text)
  "Given a language code or language name for the origin and destination 
languages FROM and TO (see `translate-pairs') and some TEXT, returns a string
containing the translated text from `translate-program' (gnome translate
or a work-alike). If an error occurs, either internally or while invoking 
`translate-program', signals an `error' instead."
  (setq text (mapconcat #'(lambda (arg) (format "%s" arg)) text " "))
  ;; =======================================================================
  ;; we might have to force the locale, according to the translate docs,
  ;; but this doesn't actually seem to be necessary at the moment.
  ;; -----------------------------------------------------------------------
  ;; call-process should use utf-8, that's what libtranslate wants: hence 
  ;; we set process-coding-system-alist.
  ;; -----------------------------------------------------------------------
  (let ( (from-lang (format "%s" from)) 
         (to-lang   (format "%s" to)) 
         (translation nil)  ;; translated text, or libtranslate error
         (code        nil)  ;; cons of (origin-lang . dest-lang)
         (status      nil) );; return code of command. 0 => success.
    (setq code (translate-req-to-pair from-lang to-lang)
          from (car code)
          to   (cdr code))
    (cond 
     ( (not code)
       (error "%s -> %s: no matching translation services found.\n" 
              (or (translate-full-name from-lang) from-lang)
              (or (translate-full-name to-lang  ) to-lang  )) )
     ( (member (car code) translate-unsupported-langs)
       (error "Sorry, unicode support for %s is not yet complete." 
              (translate-full-name from-lang)) )
     ( (member (cdr code) translate-unsupported-langs)
       (error "Sorry, unicode support for %s is not yet complete." 
              (translate-full-name to-lang)) )
     ( t
       (with-temp-buffer
         (let ( (lc-all (getenv "LC_ALL"))
                (lang   (getenv "LANG"))
                (coding-system-for-read  'utf-8)
                (coding-system-for-write 'utf-8)
                (process-coding-system-alist '("." . utf-8)) )
           (insert text)
           (setenv "LC_ALL" nil)
           (setenv "LANG"   "en_GB.UTF-8")
           (setq status
                 (call-process-region (point-min) (point-max)
                                      translate-program
                                      :delete-input (current-buffer) nil
                                      "-f" from "-t" to)
                 translation (buffer-substring-no-properties (point-min)
                                                             (point-max)))
           (setenv "LANG" lang)
           (setenv "LC_ALL" lc-all)
           )) ))
     (if (/= 0 status)
         (error "%d - %s" status translation))
     translation ))

(provide 'translate)