#!/usr/bin/perl -T # -------------------------------------------------------- # tinyglot.pl # -------------------------------------------------------- # version 1.1 (21-Dec-2004) # [Alessandro Ranellucci ] # --> Released under Perl Artistic License # # This script is useful to maintain translation of .strings # files or plist-based dict files in Cocoa applications. # It compares two files (the new unlocalized one and the old # localized one) and merges their strings into a new file. # New strings, that have no translation, are put at the end # of the file so that it's easy to complete them. # This script reads and generates both plain .strings files # (UTF-16 encoding) and XML plist files (UTF-8) encoding. # # Three arguments are required: # old_file: the latest localized file # new_file: the newest unlocalized file # output_file: where the two above are merged to # # REQUIREMENTS: # - Unicode::String (available from CPAN) # use strict; use Unicode::String qw(utf7 utf16 utf8 latin1); my ($encoding, $old_file, $new_file, $output_file, $output); my (%strings, %localized_strings, @empty_strings); $encoding = 'plain'; foreach my $arg (@ARGV) { if ($arg eq '--plain') { $encoding = 'plain'; } elsif ($arg eq '--xml') { $encoding = 'xml'; } elsif ($arg !~ /^--/ && !defined($old_file)) { $old_file = $arg; } elsif ($arg !~ /^--/ && !defined($new_file)) { $new_file = $arg; } elsif ($arg !~ /^--/ && !defined($output_file)) { $output_file = $arg; } else { print "Unknown argument \"$arg\"\n"; show_usage(); } } show_usage() if !defined($old_file); show_usage() if !defined($new_file); show_usage() if !defined($output_file); %strings = read_file($new_file); foreach my $key (keys %strings) { $strings{$key} = '' } %localized_strings = read_file($old_file); my ($count_localized, $count_empty) = (0, 0); foreach my $key (keys %strings) { if ($localized_strings{$key}) { $strings{$key} = $localized_strings{$key}; $count_localized++; } else { push(@empty_strings, $key); delete $strings{$key}; $count_empty++; } } print "--> Total: " . ($count_localized + $count_empty) . " strings ($count_empty empty)\n"; $output = Unicode::String->new(); if ($encoding eq 'xml') { $output = latin1('' . "\n"); $output .= latin1('' . "\n"); $output .= latin1('' . "\n"); $output .= latin1('' . "\n"); } #%strings = sort %strings; foreach my $key (keys %strings) { if ($encoding eq 'plain') { $output .= utf7($key) . latin1(' = ') . utf7($strings{$key}) . latin1(';' . "\n"); } if ($encoding eq 'xml') { $output .= latin1(" ") . utf7(escape_xml($key)) . latin1("\n"); $output .= latin1(" ") . utf7(escape_xml($strings{$key})) . latin1("\n"); } } foreach my $key (@empty_strings) { if ($encoding eq 'plain') { $output .= utf7($key) . latin1(' = "";' . "\n"); } if ($encoding eq 'xml') { $output .= latin1(" ") . utf7(escape_xml($key)) . latin1("\n"); $output .= latin1(" \n"); } } if ($encoding eq 'xml') { $output .= latin1("\n\n"); } ($output_file =~ m/^([a-z0-9\.\&\/\s_-]+)$/i) && ($output_file = $1) or die "$output_file: invalid file path"; print "Output file: $output_file\n"; open (OUTPUT, ">$output_file"); if ($encoding eq 'plain') { print OUTPUT chr(0xFE) . chr(0xFF); print OUTPUT $output->utf16; print "Written to a plain .strings file (UTF-16).\n"; } if ($encoding eq 'xml') { print OUTPUT $output->utf8; print "Written to a plist dict file (UTF-8).\n"; } close OUTPUT; #################################################### sub escape_xml { my $str = shift; $str =~ s/&/&/g; $str =~ s//>/g; return $str; } sub unescape_xml { my $str = shift; $str =~ s/&/&/g; $str =~ s/<//g; return $str; } sub read_file { my $file_path = shift; my ($file, $u, %dict, @lines, $line, $count_localized, $count_empty); ($file_path =~ m/^([a-z0-9\.\&\/\s_-]+)$/i) && ($file_path = $1) or die "$file_path: invalid file path"; (-e $file_path) or die "$file_path: file not found"; $ENV{'PATH'} = '/bin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin'; delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; $file = `cat "$file_path"`; if ($file =~ /encoding="UTF-8"/i || substr($file,0,3) eq chr(0xEF) . chr(0xBB) . chr(0xBF)) { $u = utf8($file); } else { $u = utf16($file); } $file = $u->utf8; $file =~ s/<\/key>[\r\n]\t/<\/key>/gi; @lines = split(/[\n\r]/, $file); $count_localized = $count_empty = 0; foreach $line (@lines) { $line =~ m/^\s*("(?:\\["\\]|[^"])*?")\s*;\s*$/ && ($dict{$1} = ''); $line =~ m/^\s*("(?:\\["\\]|[^"])*?")\s*=\s*("(?:\\["\\]|[^"])*?")\s*;\s*$/ && ($dict{$1} = $2); $line =~ m/^\s*([^"=\s]+)\s*=\s*("(?:\\["\\]|[^"])*?")\s*;\s*$/ && ($dict{$1} = $2); $line =~ m/([^<]+)<\/key>([^<]*)<\/string>/ && ($dict{unescape_xml($1)} = unescape_xml($2)); } foreach my $key (keys %dict) { $dict{$key} eq '' ? $count_empty++ : $count_localized++ } print "$file_path: " . ($count_localized + $count_empty) . " strings found ($count_empty empty)\n"; return %dict; } sub show_usage { print "Usage: tinyglot.pl [ --xml | --plain ] old_file new_file output_file\n"; exit; }