#!/usr/bin/perl # use module use XML::Simple; use Data::Dumper; open (ERR, ">error.txt"); # create object $xml = new XML::Simple; # read XML file $data = $xml->XMLin($ARGV[0]); # deal with the comments first $comments = 0; foreach $key (sort keys %{$data->{'entry'}}) { # all comments are "posts" next if !($key =~ /\.post-/); # but only comments have a "thr:in-reply-to" element $hr = $data->{'entry'}->{$key}; next if !(exists($hr->{'thr:in-reply-to'})); $comments += 1; # print $hr->{'thr:in-reply-to'}->{'ref'}, "\n"; my $d = fixDate($hr->{'published'}); $comment{$hr->{'thr:in-reply-to'}->{'ref'}} .= <{'author'}->{'name'} DATE: $d EMAIL: $hr->{'author'}->{'email'} URL: $hr->{'author'}->{'uri'} $hr->{'content'}->{'content'} ----- EOF # print $comment{$hr->{'thr:in-reply-to'}->{'ref'}}, "\n"; # last; } $entries = 0; foreach $key (sort keys %{$data->{'entry'}}) { next if !($key =~ /\.post-/); # it's a post or a comment $hr = $data->{'entry'}->{$key}; next if (exists($hr->{'thr:in-reply-to'})); # already processed the comments $entries += 1; # print STDERR $key, "\n";; # locate the title (and URL) of the blog post foreach $l (@{$hr->{'link'}}) { if ($l->{'rel'} eq 'alternate') { $lr = $l; last; } } if (!defined($lr)) { print STDERR "ERROR - no title for $key (skipped)\n"; next; } # $lr now points to the 'alternate' link for this post (title and URL data) print "AUTHOR: ", getName($hr->{'author'}->{'name'}), "\n"; print "DATE: ", fixDate($hr->{'published'}), "\n"; print "TITLE: ", $hr->{'title'}->{'content'}, "\n"; print "BASENAME: ", getBase($lr->{'href'}), "\n"; # draft or published print "STATUS: ", (exists($hr->{'app:control'}) && $hr->{'app:control'}->{'app:draft'} eq 'yes') ? "Draft" : "Publish", "\n"; # miscellany print "ALLOW COMMENTS: 1\n"; print "CONVERT BREAKS: 0\n"; # get the tags, if any if (ref($hr->{'category'}) eq 'ARRAY') { $tags = ""; foreach $r (@{$hr->{'category'}}) { if ($r->{'scheme'} eq 'http://www.blogger.com/atom/ns#') { $tags .= "," if ($tags ne ""); if ($r->{'term'} =~ / /) { $tags .= '"' . $r->{'term'} . '"'; } else { $tags .= $r->{'term'}; } } } if ($tags ne "") { print "TAGS: $tags\n"; } } print "-----\n"; print "BODY:\n"; print uncode($hr->{'content'}->{'content'}), "\n"; print "-----\n"; if (exists($comment{$key})) { # print STDERR "Comment found for $key.\n"; print uncode($comment{$key}), "\n"; # print $comment{$key}, "\n"; } print "--------\n"; } print STDERR "$entries entries, $comments comments.\n"; close(ERR); exit; # translate Blogger date format (YYYY-MM-DDThh:mm:ss) into MT date format (MM/DD/YYYY hh:mm:ss) sub fixDate { my $d = shift; # fixing 2008-01-22T16:04:00.000-08:00 if (($d =~ /^(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d:\d\d:\d\d)/) == 0) { print STDERR "Error parsing $d\n"; return; } return "$2/$3/$1 $4"; } # translate the Blogger file name into an MT basename sub getBase { my $href = shift; # e.g. http://photo.who2.com/blog/yyyy/mm/filename-with-dashes.ext if (($href =~ /^http:\/\/.*\/blog\/\d\d\d\d\/\d\d\/(.+)\.(txt|html)$/) == 0) { print STDERR "Error parsing $href\n"; return; } return $1; } # Take care of any strange (Unicode) characters in the Blogger data sub uncode { my $t1 = shift; # that pesky Unicode! $t1 =~ s/\x{2013}/\–/g; # E2-80-93 $t1 =~ s/\x{2014}/\—/g; # E2-80-94 $t1 =~ s/\x{2018}/\‘/g; # E2-80-98 $t1 =~ s/\x{2019}/\’/g; # E2-80-99 $t1 =~ s/\x{201C}/\“/g; # E2-80-9C $t1 =~ s/\x{201D}/\”/g; # E2-80-9D $t1 =~ s/\x{2026}/.../g; # E2-80-A6 $t1 =~ s/\x{2028}//g; # E2-80-A8 (line separator) $t1 =~ s/\x{2122}/\™/g; # E2-82-A2 $t1 =~ s/\x{FB01}/fi/g; # EF-AC-81 $t1 =~ s/\x{FB02}/fl/g; # EF-AC-82 $t1 =~ s/\x85/.../g; # $t1 =~ s/\xa3/£/g; # $t1 =~ s/\xa9/\©/g; # $t1 =~ s/\xe0/\à/g; # $t1 =~ s/\xe7/\ç/g; # $t1 =~ s/\xe9/\é/g; # $t1 =~ s/\xea/\ê/g; # $t1 =~ s/\xf1/\ñ/g; # $t1 =~ s/\xf3/\ó/g; # $t1 =~ s/\xf6/\ö/g; # $t1 =~ s/\xfc/\ü/g; # my $t2 = $t1; $t1 =~ s/[^ -~\n\r]//g; if ($t1 ne $t2) { my $t3 = $t2; $t3 =~ s/[ -~\n\r]//g; print ERR $lr->{'href'}, " contains ", length($t3), " non-ASCII characters ($t3).\n"; } return $t1; } # map Blogger display names to MT usernames sub getName { $n = shift; return "fritz" if ($n =~ /Mr. Holznagel/); return "paul" if ($n =~ /Mr. Hehn/); return "adam" if ($n =~ /Adam/); print STDERR "ERROR - Unknown author($n)\n"; return "fritz"; }