bucky2/rest/topsy-nndb-web.pl


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123

#!/usr/bin/perl
use lib "../lib";
use Rest::Topsy;
use Data::Dumper;
my $topsy = new Rest::Topsy;

my $tasks = load_task_history($topsy);

my $matches = [];
foreach my $task (@$tasks)
	{
	next if $task->{'date'} == -1;
	foreach my $k (qw[all month week day hour])
		{
		$task->{$k} =~ s/K$/000/;
		$task->{$k} =~ s/M$/000000/;
		}
	push @$matches, $task;
	}
our $task_count = scalar(@$tasks);
our $match_count = scalar(@$matches);
our $percent = sprintf "%0.1f%%", 100* ($match_count/$task_count);

print "Pulled $match_count/$task_count ($percent complete)\n";

print_report( "name",	[(sort name_sort @$matches)] );
print_report( "all",	[(sort { for $k (qw[all month week day hour]) { return $b->{$k} <=> $a->{$k} || next } } @$matches)] );
print_report( "month",	[(sort { for $k (qw[month week day hour all]) { return $b->{$k} <=> $a->{$k} || next } } @$matches)] );
print_report( "week",	[(sort { for $k (qw[week day hour all month]) { return $b->{$k} <=> $a->{$k} || next } } @$matches)] );
print_report( "day",	[(sort { for $k (qw[day hour all month week]) { return $b->{$k} <=> $a->{$k} || next } } @$matches)] );
print_report( "hour",	[(sort { for $k (qw[hour all month week day]) { return $b->{$k} <=> $a->{$k} || next } } @$matches)] );
sub print_report
	{
	my ($title, $matches) = @_;
	my $out .= header($title);
	foreach my $p (@$matches)
		{
		next unless $p->{$title};
		$out .= "<tr>";
		$out .= "<td align='right'>". $p->{'all'}  ."</td>";
		$out .= "<td align='right'>". $p->{'month'}  ."</td>";
		$out .= "<td align='right'>". $p->{'week'}  ."</td>";
		$out .= "<td align='right'>". $p->{'day'}  ."</td>";
		$out .= "<td align='right'>". $p->{'hour'}  ."</td>";

		my $nndb_url = sprintf("http://www.nndb.com/people/%03d/%09d/", $p->{'id'} % 997, $p->{'id'});
		my $topsy_url = "http://topsy.com/search?q=" . $p->{'name'};
		$out .= "<td align='left'><a href=\"$nndb_url\">". $p->{'name'} ."</a></td>";
		$out .= "<td align='left'><a href=\"$topsy_url\">(topsy)</a></td>";
		$out .= "</tr>";
		}
	$out .= footer();
	$topsy->write_data("../tmp/nndb/".$title.".html", $out);
	}
sub header
	{ my $current = shift; my $out .= <<__HEADER__;
<html>
<head>
<style type="text/css">
<!--
body {font-size: 13px; }
th {font-size: 13px; text-align: left;}
td {font-size: 13px; }
td a {font-size: 16px; }
td.topsy a {font-size: 16px;}
td.topsy a {display: none;}
td.bold { font-weight: bold; }
-->
</style>
</head>
<body>
Pulled $match_count/$task_count ($percent complete)<br>
<table border=0 cellpadding=0 cellspacing=5>
<tbody>
<tr>
<th>
__HEADER__
	$out .= join "</th><th>", map { $current eq $_ ? "<b>$_</b>" : "<a href='$_.html'>$_</a>" } qw[all month week day hour];
	$out .= "</th><th>name</th>";
	$out .= "</th></tr>";
	return $out;
	}
sub footer
	{ return "</tbody></table></body></html>"; }
sub nice_date
	{
	my ($date) = @_;
	$date =~ s/^_//;
	$date =~ s/\.html$//;
	return $date;
	}
sub load_task_history
	{
	my ($self) = @_;
	my $data = $self->read_data('../tmp/nndb/tasks.txt');
	my @lines = split "\n", $data;
	my @keys = qw[id name date doppelganger all month week day hour];
	my $tasks = [];
	foreach my $line (@lines)
		{
		next unless $line;
		my (@input) = split "\t", $line;
		my $hash = {};
		for (my $i = 0; $i < @input; $i++)
			{
			$hash->{$keys[$i]} = $input[$i];
			}
		push @$tasks, $hash;
		}
	return $tasks;
	}
sub name_sort
	{
	my $name_a = lc $a->{'name'};
	my $name_b = lc $b->{'name'};
	$name_a =~ s/\,.*$//;
	$name_b =~ s/\,.*$//;
	my $last_a = $name_a;
	my $last_b = $hame_b;
	$last_a =~ s/^.* //;
	$last_b =~ s/^.* //;
	return $last_a cmp $last_b || $name_a cmp $name_b;
	}