#!/usr/bin/perl # replace iso-8859-2 characters with appropriate SGML entities # usage: Lat2ent.pl entities.sgml # TODO: complete the commented out characters while ($_=<>) { s/ã/ă/g; # small a, breve s/Ã/Ă/g; # capital A, breve #s/a/ā/g; # small a, macron #s/A/Ā/g; # capital A, macron s/±/ą/g; # small a, ogonek s/¡/Ą/g; # capital A, ogonek s/æ/ć/g; # small c, acute accent s/Æ/Ć/g; # capital C, acute accent s/è/č/g; # small c, caron s/È/Č/g; # capital C, caron #s/c/ĉ/g; # small c, circumflex accent #s/C/Ĉ/g; # capital C, circumflex accent #s/c/ċ/g; # small c, dot above #s/C/Ċ/g; # capital C, dot above s/ï/ď/g; # small d, caron s/Ï/Ď/g; # capital D, caron s/ð/đ/g; # small d, stroke s/Ð/Đ/g; # capital D, stroke s/ì/ě/g; # small e, caron s/Ì/Ě/g; # capital E, caron #s/e/ė/g; # small e, dot above #s/E/Ė/g; # capital E, dot above #s/e/ē/g; # small e, macron #s/E/Ē/g; # capital E, macron s/ê/ę/g; # small e, ogonek s/Ê/Ę/g; # capital E, ogonek #s/g/ǵ/g; # small g, acute accent #s/g/ğ/g; # small g, breve #s/G/Ğ/g; # capital G, breve #s/G/Ģ/g; # capital G, cedilla #s/g/ĝ/g; # small g, circumflex accent #s/G/Ĝ/g; # capital G, circumflex accent #s/g/ġ/g; # small g, dot above #s/G/Ġ/g; # capital G, dot above #s/h/ĥ/g; # small h, circumflex accent #s/H/Ĥ/g; # capital H, circumflex accent #s/H/ħ/g; # small h, stroke #s/H/Ħ/g; # capital H, stroke #s/I/İ/g; # capital I, dot above #s/I/Ī/g; # capital I, macron #s/i/ī/g; # small i, macron #s/i/ij/g; # small ij ligature #s/I/IJ/g; # capital IJ ligature #s/i/ı/g; # small i without dot #s/i/į/g; # small i, ogonek #s/I/Į/g; # capital I, ogonek #s/i/ĩ/g; # small i, tilde #s/I/Ĩ/g; # capital I, tilde #s/j/ĵ/g; # small j, circumflex accent #s/J/Ĵ/g; # capital J, circumflex accent #s/k/ķ/g; # small k, cedilla #s/K/Ķ/g; # capital K, cedilla #s/k/ĸ/g; # small k, Greenlandic s/å/ĺ/g; # small l, acute accent s/Å/Ĺ/g; # capital L, acute accent s/µ/ľ/g; # small l, caron s/¥/Ľ/g; # capital L, caron #s/l/ļ/g; # small l, cedilla #s/L/Ļ/g; # capital L, cedilla #s/l/ŀ/g; # small l, middle dot #s/L/Ŀ/g; # capital L, middle dot s/³/ł/g; # small l, stroke s/£/Ł/g; # capital L, stroke s/ñ/ń/g; # small n, acute accent s/Ñ/Ń/g; # capital N, acute accent #s/N/ŋ/g; # small eng, Lapp #s/N/Ŋ/g; # capital ENG, Lapp #s/'/ʼn/g; # small n, apostrophe s/ò/ň/g; # small n, caron s/Ò/Ň/g; # capital N, caron #s/n/ņ/g; # small n, cedilla #s/N/Ņ/g; # capital N, cedilla s/õ/ő/g; # small o, double acute accent s/Õ/Ő/g; # capital O, double acute accent #s/O/Ō/g; # capital O, macron #s/o/ō/g; # small o, macron #s/o/œ/g; # small oe ligature #s/O/Œ/g; # capital OE ligature s/à/ŕ/g; # small r, acute accent s/À/Ŕ/g; # capital R, acute accent s/ø/ř/g; # small r, caron s/Ø/Ř/g; # capital R, caron #s/r/ŗ/g; # small r, cedilla #s/R/Ŗ/g; # capital R, cedilla s/¶/ś/g; # small s, acute accent s/¦/Ś/g; # capital S, acute accent s/¹/š/g; # small s, caron s/©/Š/g; # capital S, caron s/º/ş/g; # small s, cedilla s/ª/Ş/g; # capital S, cedilla #s/s/ŝ/g; # small s, circumflex accent #s/S/Ŝ/g; # capital S, circumflex accent s/»/ť/g; # small t, caron s/«/Ť/g; # capital T, caron s/þ/ţ/g; # small t, cedilla s/Þ/Ţ/g; # capital T, cedilla #s/t/ŧ/g; # small t, stroke #s/T/Ŧ/g; # capital T, stroke #s/u/ŭ/g; # small u, breve #s/U/Ŭ/g; # capital U, breve s/û/ű/g; # small u, double acute accent s/Û/Ű/g; # capital U, double acute accent #s/u/ū/g; # small u, macron #s/U/Ū/g; # capital U, macron #s/u/ų/g; # small u, ogonek #s/U/Ų/g; # capital U, ogonek s/ù/ů/g; # small u, ring s/Ù/Ů/g; # capital U, ring #s/u/ũ/g; # small u, tilde #s/U/Ũ/g; # capital U, tilde #s/w/ŵ/g; # small w, circumflex accent #s/W/Ŵ/g; # capital W, circumflex accent #s/y/ŷ/g; # small y, circumflex accent #s/Y/Ŷ/g; # capital Y, circumflex accent #s/Y/Ÿ/g; # capital Y, dieresis or umlaut mark s/¼/ź/g; # small z, acute accent s/¬/Ź/g; # capital Z, acute accent s/¾/ž/g; # small z, caron s/®/Ž/g; # capital Z, caron s/¿/ż/g; # small z, dot above s/¯/Ż/g; # capital Z, dot above print $_; }