mirror of https://github.com/tLDP/LDP
130 lines
4.8 KiB
Perl
Executable File
130 lines
4.8 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
# replace iso-8859-2 characters with appropriate SGML entities
|
|
# usage: Lat2ent.pl <latin2.sgml >entities.sgml
|
|
# TODO: complete the commented out characters
|
|
|
|
while ($_=<>) {
|
|
s/ã/ă/g; # small a, breve
|
|
s/Ã/Ă/g; # capital A, breve
|
|
#s/a/ā/g; # small a, macron
|
|
#s/A/Ā/g; # capital A, macron
|
|
s/±/ą/g; # small a, ogonek
|
|
s/¡/Ą/g; # capital A, ogonek
|
|
s/æ/ć/g; # small c, acute accent
|
|
s/Æ/Ć/g; # capital C, acute accent
|
|
s/è/č/g; # small c, caron
|
|
s/È/Č/g; # capital C, caron
|
|
#s/c/ĉ/g; # small c, circumflex accent
|
|
#s/C/Ĉ/g; # capital C, circumflex accent
|
|
#s/c/ċ/g; # small c, dot above
|
|
#s/C/Ċ/g; # capital C, dot above
|
|
s/ï/ď/g; # small d, caron
|
|
s/Ï/Ď/g; # capital D, caron
|
|
s/ð/đ/g; # small d, stroke
|
|
s/Ð/Đ/g; # capital D, stroke
|
|
s/ì/ě/g; # small e, caron
|
|
s/Ì/Ě/g; # capital E, caron
|
|
#s/e/ė/g; # small e, dot above
|
|
#s/E/Ė/g; # capital E, dot above
|
|
#s/e/ē/g; # small e, macron
|
|
#s/E/Ē/g; # capital E, macron
|
|
s/ê/ę/g; # small e, ogonek
|
|
s/Ê/Ę/g; # capital E, ogonek
|
|
#s/g/ǵ/g; # small g, acute accent
|
|
#s/g/ğ/g; # small g, breve
|
|
#s/G/Ğ/g; # capital G, breve
|
|
#s/G/Ģ/g; # capital G, cedilla
|
|
#s/g/ĝ/g; # small g, circumflex accent
|
|
#s/G/Ĝ/g; # capital G, circumflex accent
|
|
#s/g/ġ/g; # small g, dot above
|
|
#s/G/Ġ/g; # capital G, dot above
|
|
#s/h/ĥ/g; # small h, circumflex accent
|
|
#s/H/Ĥ/g; # capital H, circumflex accent
|
|
#s/H/ħ/g; # small h, stroke
|
|
#s/H/Ħ/g; # capital H, stroke
|
|
#s/I/İ/g; # capital I, dot above
|
|
#s/I/Ī/g; # capital I, macron
|
|
#s/i/ī/g; # small i, macron
|
|
#s/i/ij/g; # small ij ligature
|
|
#s/I/IJ/g; # capital IJ ligature
|
|
#s/i/ı/g; # small i without dot
|
|
#s/i/į/g; # small i, ogonek
|
|
#s/I/Į/g; # capital I, ogonek
|
|
#s/i/ĩ/g; # small i, tilde
|
|
#s/I/Ĩ/g; # capital I, tilde
|
|
#s/j/ĵ/g; # small j, circumflex accent
|
|
#s/J/Ĵ/g; # capital J, circumflex accent
|
|
#s/k/ķ/g; # small k, cedilla
|
|
#s/K/Ķ/g; # capital K, cedilla
|
|
#s/k/ĸ/g; # small k, Greenlandic
|
|
s/å/ĺ/g; # small l, acute accent
|
|
s/Å/Ĺ/g; # capital L, acute accent
|
|
s/µ/ľ/g; # small l, caron
|
|
s/¥/Ľ/g; # capital L, caron
|
|
#s/l/ļ/g; # small l, cedilla
|
|
#s/L/Ļ/g; # capital L, cedilla
|
|
#s/l/ŀ/g; # small l, middle dot
|
|
#s/L/Ŀ/g; # capital L, middle dot
|
|
s/³/ł/g; # small l, stroke
|
|
s/£/Ł/g; # capital L, stroke
|
|
s/ñ/ń/g; # small n, acute accent
|
|
s/Ñ/Ń/g; # capital N, acute accent
|
|
#s/N/ŋ/g; # small eng, Lapp
|
|
#s/N/Ŋ/g; # capital ENG, Lapp
|
|
#s/'/ʼn/g; # small n, apostrophe
|
|
s/ò/ň/g; # small n, caron
|
|
s/Ò/Ň/g; # capital N, caron
|
|
#s/n/ņ/g; # small n, cedilla
|
|
#s/N/Ņ/g; # capital N, cedilla
|
|
s/õ/ő/g; # small o, double acute accent
|
|
s/Õ/Ő/g; # capital O, double acute accent
|
|
#s/O/Ō/g; # capital O, macron
|
|
#s/o/ō/g; # small o, macron
|
|
#s/o/œ/g; # small oe ligature
|
|
#s/O/Œ/g; # capital OE ligature
|
|
s/à/ŕ/g; # small r, acute accent
|
|
s/À/Ŕ/g; # capital R, acute accent
|
|
s/ø/ř/g; # small r, caron
|
|
s/Ø/Ř/g; # capital R, caron
|
|
#s/r/ŗ/g; # small r, cedilla
|
|
#s/R/Ŗ/g; # capital R, cedilla
|
|
s/¶/ś/g; # small s, acute accent
|
|
s/¦/Ś/g; # capital S, acute accent
|
|
s/¹/š/g; # small s, caron
|
|
s/©/Š/g; # capital S, caron
|
|
s/º/ş/g; # small s, cedilla
|
|
s/ª/Ş/g; # capital S, cedilla
|
|
#s/s/ŝ/g; # small s, circumflex accent
|
|
#s/S/Ŝ/g; # capital S, circumflex accent
|
|
s/»/ť/g; # small t, caron
|
|
s/«/Ť/g; # capital T, caron
|
|
s/þ/ţ/g; # small t, cedilla
|
|
s/Þ/Ţ/g; # capital T, cedilla
|
|
#s/t/ŧ/g; # small t, stroke
|
|
#s/T/Ŧ/g; # capital T, stroke
|
|
#s/u/ŭ/g; # small u, breve
|
|
#s/U/Ŭ/g; # capital U, breve
|
|
s/û/ű/g; # small u, double acute accent
|
|
s/Û/Ű/g; # capital U, double acute accent
|
|
#s/u/ū/g; # small u, macron
|
|
#s/U/Ū/g; # capital U, macron
|
|
#s/u/ų/g; # small u, ogonek
|
|
#s/U/Ų/g; # capital U, ogonek
|
|
s/ù/ů/g; # small u, ring
|
|
s/Ù/Ů/g; # capital U, ring
|
|
#s/u/ũ/g; # small u, tilde
|
|
#s/U/Ũ/g; # capital U, tilde
|
|
#s/w/ŵ/g; # small w, circumflex accent
|
|
#s/W/Ŵ/g; # capital W, circumflex accent
|
|
#s/y/ŷ/g; # small y, circumflex accent
|
|
#s/Y/Ŷ/g; # capital Y, circumflex accent
|
|
#s/Y/Ÿ/g; # capital Y, dieresis or umlaut mark
|
|
s/¼/ź/g; # small z, acute accent
|
|
s/¬/Ź/g; # capital Z, acute accent
|
|
s/¾/ž/g; # small z, caron
|
|
s/®/Ž/g; # capital Z, caron
|
|
s/¿/ż/g; # small z, dot above
|
|
s/¯/Ż/g; # capital Z, dot above
|
|
print $_;
|
|
}
|