#!/usr/bin/perl # # Script to save the S@H Stats page # # Copyright (C) Chris D. Halversion, 2001 # # # This code is put into the public domain. It comes with no warranty, # use at your own risk # # Some parts are thanks to Dominique Pelle and his RC5 stats generation # scripts at http://dominique.pelle.free.fr/ I used some gnuplot settings # to help mine out a bit. # use LWP::Simple; use POSIX; use HTML::TokeParser; use Time::Local; use vars qw( %month %rmonth ); $| = 1; my $get_stats = 1; # Change this to be where the base level directory is my $basedir; #if ((uname)[1] eq "web1") { $basedir = "/home/cdh/www/seti"; #} else { # $basedir = "/home/cdh/public_html/seti"; #} # Needed because gnuplot is dynamically linked and when I run this via cron # gnuplot can't find some libraries. $ENV{'LD_LIBRARY_PATH'} = "/usr/local/lib:/usr/lib:/lib"; my %month = qw(Jan 1 Feb 2 Mar 3 Apr 4 May 5 Jun 6 Jul 7 Aug 8 Sep 9 Oct 10 Nov 11 Dec 12); my %rmonth = reverse %month; # Used if need to regenerate the stats by hand #my $html = $ARGV[0]; #my $ltime; #(my $cmd = $html) =~ s:.*/::; #($ltime = $cmd) =~ s/(\d+).html/$1/; my $ltime = strftime("%Y%m%d%H%M", localtime(time)); my $logfile = "$basedir/units.log"; my $html = "$basedir/.pages/$ltime.html"; my $secs_per_min = 60; my $secs_per_hour = ($secs_per_min * 60); my $secs_per_day = ($secs_per_hour * 24); # Let's go directly to the dir (makes it easier for gnuplot) chdir "$basedir"; my $loops = 0; # We'll try it five times as sometimes the server doesn't respond if ($get_stats) { while (is_error(get_seti($html))) { if ($loops > 4) { print "Error in getting HTML.\n"; print "Exiting.\n"; exit(1); } sleep 60; $loops++; } # The results line looks like: # Results Received159 # We'll just rip it out my $p = HTML::TokeParser->new($html); my $num; my $in_hours; my $in_mins; my $in_secs; my $percentage; my $ranking; my $last_unit_received_epoch; my $registered_on_epoch; my $in_results_received = 0; my $in_total_cpu = 0; my $in_more_units_than = 0; my $in_your_rank = 0; my $in_last_unit_received = 0; my $in_registered_on = 0; #while (my $token = $p->get_tag("td")) { while (my $token = $p->get_tag()) { my $text = $p->get_trimmed_text(); if ($text =~ /Average results received per day/i) { next; } if ($text =~ /Results Received/i) { $in_results_received = 1; next; } elsif ($text =~ /Average CPU Time/i) { $in_total_cpu = 1; next; } elsif ($text =~ /more work units than/i) { $in_more_units_than = 1; next; } elsif ($text =~ /total users is/i) { $in_your_rank = 1; next; } if ($in_results_received) { if ($text eq "") { next; } $num = $text; $in_results_received = 0; } elsif ($in_total_cpu) { if ($text eq "") { next; } $text =~ /(\d+) hr (\d+) min (\d+\.?\d*) sec/; $in_hours = $1; $in_mins = $2; $in_secs = $3; $in_total_cpu = 0; } elsif ($in_more_units_than) { if ($text eq "") { next; } $text =~ /(\d+\.?\d*)\%.*/i; $percentage = $1; $in_more_units_than = 0; } elsif ($in_your_rank) { if ($text eq "") { next; } $text =~ /(\d+).*/i; $ranking = $1; $in_your_rank = 0; } } my $t_calc_secs; $t_calc_secs = $in_hours * $secs_per_hour; $t_calc_secs += $in_mins * $secs_per_min; $t_calc_secs += $in_secs; # Make them all floats that have at least one decimal my $calc_secs = sprintf("%.1f", $t_calc_secs); # Save them to a TSV file open(LOG, ">>$logfile") or die "Cannot open log: $!\n"; print LOG "$ltime\t$num\t$calc_secs\t$percentage\t$ranking\n" or die "Cannot write to file: $!\n"; close(LOG); } my ($t_units_per_day, $t_total_units) = generate_stats($logfile); my %units_per_day = %$t_units_per_day; my $total_units = $$t_total_units; my $best_day = 0; my $best_units = 0; my $yesterday = 0; my $num_days = 0; my $all_days = 0; my $first_day = 0; my $last_day = 0; my $per_day_file = tmpnam(); #print "Per day file: $per_day_file\n"; my $diff = 0; open(DAY, "> $per_day_file") or die "Cannot open $per_day_file: $!\n"; foreach (sort keys %units_per_day) { print DAY "$_\t$units_per_day{$_}\n"; if ($yesterday eq 0) { $yesterday = $_; $first_day = $_; next; } if ($units_per_day{$_} > $best_units) { $best_units = $units_per_day{$_}; $best_day = $_; } $yesterday = $_; $last_day = $_; $num_days++; } close(DAY); $all_days = (date_2_epoch($last_day) - date_2_epoch($first_day)) / $secs_per_day; my $overall_rate = $total_units / $all_days; $best_day = epoch_2_date(date_2_epoch($best_day)); my $yrange_units = "[0:" . (1.05 * $total_units) . "]"; my $yrange_rate = "[0:" . (1.05 * $best_units) . "]"; # Get the xrange to be one day before the first day and one day after the # last. Gnuplot's auto-generation leaves a lot on each side. my $xrange = "[\"" . date_2_string(epoch_2_date(date_2_epoch($first_day) - $secs_per_day)) . "\":\"" . date_2_string(epoch_2_date(date_2_epoch($last_day) + $secs_per_day)) . "\"]"; # my @args; # push @args, "/usr/local/bin/gnuplot"; # push @args, "$basedir/plots.cmd"; # system @args; # if (($? & 256) != 0) { # print "Error running gnuplot for the units\n"; # exit(1); # } my $gnuplot = "/usr/local/bin/gnuplot"; open (PLOT, "| $gnuplot") or die "Can't start gnuplot: $!"; print PLOT qq{ set title "Seti\@Home Work Unit Graph for chris\@halverson.org\\nBest day: $best_day ($best_units units sent)\\nNumber of days with results: $num_days Total number of days: $all_days"; set xdata time; set timefmt "%Y%m%d%H%M"; set format x "%d-%b-%Y"; set nokey; set grid; set xtics rotate; set xrange $xrange; set yrange $yrange_units; set terminal png color; set ylabel "Number of Units"; set output "units.png"; plot "units.log" using 1:2 with lines; }; close(PLOT); $overall_rate = sprintf("%.2f", $overall_rate); $first_day = epoch_2_date(date_2_epoch($first_day)); $last_day = epoch_2_date(date_2_epoch($last_day)); open (PLOT, "| $gnuplot") or die "Can't start gnuplot: $!"; print PLOT qq{ set title "Seti\@Home Rate History for chris\@halverson.org\\nFirst unit: $first_day, Last unit: $last_day\\nOveral average: $overall_rate units/day"; set xdata time; set timefmt "%Y%m%d"; set format x "%d-%b-%Y"; set grid; set xrange $xrange; set yrange $yrange_rate; set xtics rotate; set terminal png color; set ylabel "Units/day"; set output "perday.png"; plot "$per_day_file" using 1:2 title "daily units" with lines linetype 3, $overall_rate title "overall rate" linetype 2; }; close(PLOT); # This is just for some debugging info, just curious to see how many # times it takes > 1 attempt if ($loops > 0) { # Since it starts at 0, let's increment now so we don't have to # mentally do it $loops++; print "Number of attempts needed: $loops\n"; } #unlink($per_day_file); exit(0); sub get_seti { my $savefile = shift; # Let's store it just in case. That way we can recreate things if necessary #my $url = "http://iosef.ssl.berkeley.edu/cgi-bin/cgi?email=chris%40halverson.org&cmd=user_stats_new"; #my $url = 'http://setiathome.ssl.berkeley.edu/cgi-bin/cgi?email=chris@halverson.org&cmd=user_stats_new'; my $url = 'http://setiathome.ssl.berkeley.edu/fcgi-bin/fcgi?email=chris@halverson.org&cmd=user_stats_new'; my $rc = getstore($url, $savefile); return($rc); } # # $time = timelocal($sec,$min,$hours,$mday,$mon,$year); # $time = timegm($sec,$min,$hours,$mday,$mon,$year); # sub date_2_epoch { # Comes in as YYYYMMDDHHMM my $in_date = shift; my $t_year = substr($in_date, 0, 4); my $t_month = substr($in_date, 4, 2); my $t_day = substr($in_date, 6, 2); return(timegm(0, 0, 0, $t_day, $t_month - 1, $t_year)); } sub epoch_2_date { my $epoch = shift; # we could use the '%d-%B-%Y' date format but it depends on the locale # settings. %d-%m-%Y does not depend on the locale settings. ($_ = strftime('%d-%m-%Y', gmtime($epoch))) =~ s/-0?(\d+)-/-$rmonth{$1}-/; return $_; } sub date_2_string { # This comes in the form of "30-Jan-2001" my $in_date = shift; my @a = split(/-/, $in_date); my $ret = $a[2] . sprintf("%02d", $month{$a[1]}) . sprintf("%02d", $a[0]); return($ret); } sub generate_stats { my $logfile = shift; my %units_per_day; my %total_per_day; my $total_num_units = 0; open(LOG, "<$logfile") or die "Cannot open $logfile: $!\n"; while () { chomp; my ($date, $units, $cpu, $percentage, $ranking) = split(/\t/, $_); if ($date =~ /(\d{8})0000/) { # 200111120000 == midnight $total_per_day{$1} = $units; } $total_num_units = $units; } close(LOG); my $last_num_units = 0; # Do things that are "day" based in this loop my @a = sort keys %total_per_day; my $e; my $first_day = $a[0]; my $previous_day = 0; my $last_day = $a[$#a]; my $first = 1; for ($e = $first_day; $e le $last_day; $e = date_2_string(epoch_2_date(date_2_epoch($e) + $secs_per_day))) { # if ($last_num_units = 0) { # $last_num_units = $total_per_day{$e}; # next; # } if ($first) { $previous_day = date_2_string(epoch_2_date(date_2_epoch($e) - $secs_per_day)); $total_per_day{previous_day} = 0; $first = 0; } # if ($previous_day eq 0) { # $last_num_units = 0; # } if (! $total_per_day{$e}) { $units_per_day{$e} = 0; $total_per_day{$e} = 0; } else { # $units_per_day{$e} = ($total_per_day{$e} - $total_per_day{$previous_day}); $units_per_day{$e} = ($total_per_day{$e} - $last_num_units); $last_num_units = $total_per_day{$e}; } $previous_day = $e; } return(\%units_per_day, \$total_num_units); }