Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
e27db5a3
Kaydet (Commit)
e27db5a3
authored
Agu 13, 1999
tarafından
Barry Warsaw
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Initial implementation
üst
31e5e371
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
268 additions
and
0 deletions
+268
-0
pygettext.py
Tools/i18n/pygettext.py
+268
-0
No files found.
Tools/i18n/pygettext.py
Dosyayı görüntüle @
e27db5a3
#! /usr/bin/env python
"""pygettext -- Python equivalent of xgettext(1)
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
internationalization of C programs. Most of these tools are independent of
the programming language and can be used from within Python programs. Martin
von Loewis' work[1] helps considerably in this regard.
There's one hole though; xgettext is the program that scans source code
looking for message strings, but it groks only C (or C++). Python introduces
a few wrinkles, such as dual quoting characters, triple quoted strings, and
raw strings. xgettext understands none of this.
Enter pygettext, which uses Python's standard tokenize module to scan Python
source code, generating .pot files identical to what GNU xgettext[2] generates
for C and C++ code. From there, the standard GNU tools can be used.
A word about marking Python strings as candidates for translation. GNU
xgettext recognizes the following keywords: gettext, dgettext, dcgettext, and
gettext_noop. But those can be a lot of text to include all over your code.
C and C++ have a trick: they use the C preprocessor. Most internationalized C
source includes a #define for gettext() to _() so that what has to be written
in the source is much less. Thus these are both translatable strings:
gettext("Translatable String")
_("Translatable String")
Python of course has no preprocessor so this doesn't work so well. Thus,
pygettext searches only for _() by default, but see the -k/--keyword flag
below for how to augment this.
[1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
[2] http://www.gnu.org/software/gettext/gettext.html
NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
where ever possible.
Usage: pygettext [options] filename ...
Options:
-a
--extract-all
Extract all strings
-d default-domain
--default-domain=default-domain
Rename the default output file from messages.pot to default-domain.pot
-k [word]
--keyword[=word]
Additional keywords to look for. Without `word' means not to use the
default keywords. The default keywords, which are always looked for
if not explicitly disabled: _
The default keyword list is different than GNU xgettext. You can have
multiple -k flags on the command line.
--no-location
Do not write filename/lineno location comments
-n [style]
--add-location[=style]
Write filename/lineno location comments indicating where each
extracted string is found in the source. These lines appear before
each msgid. Two styles are supported:
Solaris # File: filename, line: line-number
Gnu #: filename:line
If style is omitted, Gnu is used. The style name is case
insensitive. By default, locations are included.
--help
-h
print this help message and exit
"""
import
os
import
sys
import
string
import
time
import
getopt
import
tokenize
__version__
=
'0.1'
def
usage
(
code
,
msg
=
''
):
print
__doc__
%
globals
()
if
msg
:
print
msg
sys
.
exit
(
code
)
def
normalize
(
s
):
# This converts the various Python string types into a format that is
# appropriate for .po files, namely much closer to C style.
#
# unwrap quotes, safely
s
=
eval
(
s
,
{
'__builtins__'
:{}},
{})
# now escape any embedded double quotes
parts
=
[]
last
=
0
i
=
string
.
find
(
s
,
'"'
)
while
i
>=
0
:
# find the number of preceding backslashes
j
=
i
n
=
0
while
j
>=
0
and
s
[
i
]
==
'
\\
'
:
j
=
j
-
1
n
=
n
+
1
if
(
n
%
2
)
==
0
:
parts
.
append
(
s
[
last
:
j
])
parts
.
append
(
'
\\
'
)
parts
.
append
(
s
[
j
:
i
])
else
:
parts
.
append
(
s
[
last
:
i
])
last
=
i
i
=
string
.
find
(
s
,
'"'
,
i
+
1
)
else
:
parts
.
append
(
s
[
last
:])
if
parts
:
return
'"'
+
string
.
join
(
parts
,
''
)
+
'"'
else
:
return
'"'
+
s
+
'"'
class
TokenEater
:
def
__init__
(
self
,
options
):
self
.
__options
=
options
self
.
__messages
=
{}
self
.
__state
=
self
.
__waiting
self
.
__data
=
[]
self
.
__lineno
=
-
1
def
__call__
(
self
,
ttype
,
tstring
,
stup
,
etup
,
line
):
# dispatch
self
.
__state
(
ttype
,
tstring
,
stup
[
0
])
def
__waiting
(
self
,
ttype
,
tstring
,
lineno
):
if
ttype
==
tokenize
.
NAME
and
tstring
in
self
.
__options
.
keywords
:
self
.
__state
=
self
.
__keywordseen
def
__keywordseen
(
self
,
ttype
,
tstring
,
lineno
):
if
ttype
==
tokenize
.
OP
and
tstring
==
'('
:
self
.
__data
=
[]
self
.
__lineno
=
lineno
self
.
__state
=
self
.
__openseen
else
:
self
.
__state
=
self
.
__waiting
def
__openseen
(
self
,
ttype
,
tstring
,
lineno
):
if
ttype
==
tokenize
.
OP
and
tstring
==
')'
:
# We've seen the last of the translatable strings. Record the
# line number of the first line of the strings and update the list
# of messages seen. Reset state for the next batch. If there
# were no strings inside _(), then just ignore this entry.
if
self
.
__data
:
msg
=
string
.
join
(
self
.
__data
,
''
)
entry
=
(
self
.
__curfile
,
self
.
__lineno
)
linenos
=
self
.
__messages
.
get
(
msg
)
if
linenos
is
None
:
self
.
__messages
[
msg
]
=
[
entry
]
else
:
linenos
.
append
(
entry
)
self
.
__state
=
self
.
__waiting
elif
ttype
==
tokenize
.
STRING
:
self
.
__data
.
append
(
normalize
(
tstring
))
# TBD: should we warn if we seen anything else?
def
set_filename
(
self
,
filename
):
self
.
__curfile
=
filename
def
write
(
self
,
fp
):
options
=
self
.
__options
timestamp
=
time
.
ctime
(
time
.
time
())
# common header
try
:
sys
.
stdout
=
fp
print
'# POT file generated by pygettext.py'
,
__version__
print
'#'
,
timestamp
print
'#'
for
k
,
v
in
self
.
__messages
.
items
():
for
filename
,
lineno
in
v
:
# location comments are different b/w Solaris and GNU
if
options
.
location
==
options
.
SOLARIS
:
print
'# File:
%
s,'
%
filename
,
'line:
%
d'
%
lineno
elif
options
.
location
==
options
.
GNU
:
print
'#:
%
s:
%
d'
%
(
filename
,
lineno
)
# TBD: sorting, normalizing
print
'msgid'
,
k
print
'msgstr '
print
finally
:
sys
.
stdout
=
sys
.
__stdout__
def
main
():
default_keywords
=
[
'_'
]
try
:
opts
,
args
=
getopt
.
getopt
(
sys
.
argv
[
1
:],
'k:d:n:h'
,
[
'keyword'
,
'default-domain'
,
'help'
,
'add-location='
,
'no-location'
])
except
getopt
.
error
,
msg
:
usage
(
1
,
msg
)
# for holding option values
class
Options
:
# constants
GNU
=
1
SOLARIS
=
2
# defaults
keywords
=
[]
outfile
=
'messages.pot'
location
=
GNU
options
=
Options
()
locations
=
{
'gnu'
:
options
.
GNU
,
'solaris'
:
options
.
SOLARIS
,
}
# parse options
for
opt
,
arg
in
opts
:
if
opt
in
(
'-h'
,
'--help'
):
usage
(
0
)
elif
opt
in
(
'-k'
,
'--keyword'
):
if
arg
is
None
:
default_keywords
=
[]
options
.
keywords
.
append
(
arg
)
elif
opt
in
(
'-d'
,
'--default-domain'
):
options
.
outfile
=
arg
+
'.pot'
elif
opt
in
(
'-n'
,
'--add-location'
):
if
arg
is
None
:
arg
=
'gnu'
try
:
options
.
location
=
locations
[
string
.
lower
(
arg
)]
except
KeyError
:
usage
(
1
,
'Invalid value for --add-location: '
+
arg
)
elif
opt
in
(
'--no-location'
,):
options
.
location
=
0
# calculate all keywords
options
.
keywords
.
extend
(
default_keywords
)
# slurp through all the files
eater
=
TokenEater
(
options
)
for
filename
in
args
:
fp
=
open
(
filename
)
eater
.
set_filename
(
filename
)
tokenize
.
tokenize
(
fp
.
readline
,
eater
)
fp
.
close
()
fp
=
open
(
options
.
outfile
,
'w'
)
eater
.
write
(
fp
)
fp
.
close
()
if
__name__
==
'__main__'
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment