File perl-Finance-Quote-57401-financecanada.patch of Package perl-Finance-Quote

https://rt.cpan.org/Public/Bug/Display.html?id=57401

Date: Tue May 11 20:56:07 2010 paul.polak [...] gmail.com - Ticket created [Reply] [Forward]
Subject: FinanceCanada.pm module

The FinanceCanada module was broken since canada.finance.com changed its
website for financial information. The new module now uses
www.financialpost.com, and can retrieve quotes for both stocks and
mutual funds. The module can retrieve information from multiple
exchanges (TOR, NYSE, etc.) and because symbols are not unique
among different exchanges, it is worthwhile to check the stock symbol /
ID on the financialpost.com site before using the module to ensure
correct information is retrieved. Attached is the updated F::Q module
and a testing module.

Index: finance-quote/lib/Finance/Quote/FinanceCanada.pm
===================================================================
--- finance-quote.orig/lib/Finance/Quote/FinanceCanada.pm
+++ finance-quote/lib/Finance/Quote/FinanceCanada.pm
@@ -7,21 +7,24 @@
 # Version 0.2 Rewrite by David Hampton <hampton@employees.org> for
 # changed web site.
 #
+# Version 0.3 Rewrite by linux_slacker <paul.polak[AT]gmail[DOT]com> for
+# changed web site
 
 package Finance::Quote::FinanceCanada;
 require 5.004;
 
 use strict;
 
-use vars qw/ $VERSION / ;
-
 use LWP::UserAgent;
 use HTTP::Request::Common;
-use HTML::TableExtract;
+use HTML::TokeParser::Simple;
+
 
-$VERSION = '1.17';
-my $FINANCECANADA_MAINURL = ("http://finance.canada.com/");
-my $FINANCECANADA_URL = "http://stockgroup.canada.com/sn_overview.asp?symbol=T.";
+my $VERSION = '0.3';
+my $FINANCECANADA_MAINURL = ("http://www.financialpost.com/");
+my $FINANCECANADA_STOCKSYM_URL = "http://idms.financialpost.com/stocks/company_overview.idms?SYMBOL=";
+my $FINANCECANADA_STOCKID_URL = "http://idms.financialpost.com/stocks/company_overview.idms?ID_NOTATION=";
+my $FINANCECANADA_FUND_URL = "http://idms.financialpost.com/funds/snapshot.idms?FUND_KEY=";
 
 sub methods {
     return (canada => \&financecanada,
@@ -33,8 +36,7 @@ sub labels {
     my @labels = qw/method source name symbol currency last date isodate nav price/;
     return (canada => \@labels,
             financecanada => \@labels);
-}   
-
+}
 
 
 sub financecanada {
@@ -47,122 +49,235 @@ sub financecanada {
     my $ua = $quoter->user_agent;
 
     foreach my $symbol (@symbols) {
-	my ($day_high, $day_low, $year_high, $year_low);
+        $info{$symbol, "success"} = 0;
+        $info{$symbol, "symbol"} = $symbol;
+        $info{$symbol, "method"} = "financecanada";
+        $info{$symbol, "source"} = $FINANCECANADA_MAINURL;
+
+        my @sites;
+
+        # Figure out which URLs we should use
+        if ($symbol =~ /^[A-Za-z]/) {
+            @sites = ($FINANCECANADA_STOCKSYM_URL);
+        }
+        else {
+            @sites = ($symbol =~ /^\d{5}$/)
+                ? ($FINANCECANADA_FUND_URL, $FINANCECANADA_STOCKID_URL)
+                : ($FINANCECANADA_STOCKID_URL, $FINANCECANADA_FUND_URL);
+        }
 
-	$info{$symbol, "success"} = 0;
-	$info{$symbol, "symbol"} = $symbol;
-	$info{$symbol, "method"} = "financecanada";
-	$info{$symbol, "source"} = $FINANCECANADA_MAINURL;
-
-	# Pull the data from the web site
-        my $url = $FINANCECANADA_URL.$symbol;
-        # print $url;
-        my $response = $ua->request(GET $url);
-        # print $response->content;
-	if (!$response->is_success) {
-            $info{$symbol, "errormsg"} = "Error contacting URL";
-            next;
-        }
-
-	# Parse the page looking for the table containing the full
-	# name of the stock
-        my $te = new HTML::TableExtract( depth => 2, count => 0);
-        $te->parse($response->content);
-
-	# debug
-#	foreach my $ts ($te->table_states) {
-#	    print "\n***\n*** Table (", join(',', $ts->coords), "):\n***\n";
-#	    foreach my $row ($ts->rows) {
-#		print join(',', @$row), "\n";
-#	    }
-#	}
-
-        foreach my $ts ($te->table_states) {
-            my $row = $ts->row(0);
-	    $info{$symbol, "name"} = $row->[0]
-		if ($row->[0] =~ s/^.([\w\s]+).*/$1/);
-	}
-	if (!defined($info{$symbol, "name"})) {
-            $info{$symbol, "errormsg"} = "Invalid symbol";
-	    next;
-	}
+        foreach my $root (@sites) {
+            my $url = $root.$symbol;
 
-	# Parse the page looking for the table containing the quote
-	# details
-        $te = new HTML::TableExtract(headers => [qw(Quote)],
-				     slice_columns => 0);
-        $te->parse($response->content);
-
-	# debug
-#	foreach my $ts ($te->table_states) {
-#	    print "\n***\n*** Table (", join(',', $ts->coords), "):\n***\n";
-#	    foreach my $row ($ts->rows) {
-#		print join(',', @$row), "\n";
-#	    }
-#	}
-
-	# Now parse the quote details.  This method of parsing is
-	# independent of which row contains which data item, so if the
-	# web site reorders these it won't impact this code.
-        foreach my $ts ($te->table_states) {
-            foreach my $row ($ts->rows) {
-
-		# Remove leading and trailing white space
-		$row->[0] =~ s/^\s*(.+?)\s*$/$1/ if defined($row->[0]);
-		$row->[1] =~ s/^\s*(.+?)\s*$/$1/ if defined($row->[1]);
-
-		# Map the row into our data array
-		for ($row->[0]) {
-		    /^Last Traded/ && do { s/Last Traded: (.*) ../$1/;
-					   $quoter->store_date(\%info, $symbol, { usdate => $_}); };
-		    /^Last$/	&& do { $info{$symbol, "last"} = $row->[1];
-					$info{$symbol, "price"} = $row->[1];
-					$info{$symbol, "nav"} = $row->[1];
-					last; };
-		    /^Open$/	&& do { $info{$symbol, "open"} = $row->[1]; last; };
-		    /^Bid$/	&& do { $info{$symbol, "bid"} = $row->[1]; last; };
-		    /^Ask$/	&& do { $info{$symbol, "ask"} = $row->[1]; last; };
-		    /^% Change/ && do { $info{$symbol, "p_change"} = $row->[1];
-					$info{$symbol, "p_change"} =~ s/%//;
-					last; };
-		    /^Volume/	&& do { $info{$symbol, "volume"} = $row->[1]; last; };
-		    /^Close/	&& do { $info{$symbol, "close"} = $row->[1]; last; };
-
-		    /^Day High$/  && do { $info{$symbol, "high"} = $row->[1]; last; };
-		    /^Day Low$/	  && do { $info{$symbol, "low"} = $row->[1]; last; };
-		    /^Year High$/ && do { $year_high = $row->[1]; last; };
-		    /^Year  Low$/ && do { $year_low = $row->[1]; last; };
-
-		    $info{$symbol, "success"} = 1;
-		};
-	    }
-	}
+            my $response = $ua->request(GET $url);
+
+            if (!$response->is_success) {
+                $info{$symbol, "errormsg"} = "Error contacting URL";
+                next;
+            }
 
-	if ($info{$symbol, "success"} == 1) {
-	    $info{$symbol, "currency"} = "CAD";
-	    foreach (keys %info) {
-		$info{$_} =~ s/\$//;
-	    }
-	    $info{$symbol, "day_range"} = $info{$symbol, "low"} . " - " . $info{$symbol, "high"}
-	    if (defined($info{$symbol, "high"}) && defined($info{$symbol, "low"}));
-	    
-	    if (defined($year_high) && defined($year_low)) {
-		$info{$symbol, "year_range"} = "$year_low - $year_high";
-	    }
-	} else {
-            $info{$symbol, "errormsg"} = "Cannot parse quote data";
+            my $parser = HTML::TokeParser::Simple->new(string => $response->content);
+            my %ret = ($root eq $FINANCECANADA_FUND_URL)
+                ? ParseFund(\$parser) : ParseStock(\$parser);
+
+            for my $key (keys %ret) {
+                $info{$symbol, $key} = $ret{$key};
+            }
+
+            last if ($info{$symbol, "success"});
+        }
+
+		if ($info{$symbol, "success"} == 1) {
+
+		    if (!defined($info{$symbol, "currency"})) {
+		        $info{$symbol, "currency"} = "CAD";
+		    }
+
+		    # Use current day at GMT time, since no date given with quote
+		    my ($day, $month, $year) = getGMTDate();
+		    $quoter->store_date(\%info, $symbol,
+		        {month => $month, day => $day, year => $year});
+
+            $info{$symbol, "timezone"} = "GMT";
+
+		    foreach (keys %info) {
+		        $info{$_} =~ s/\$//;
+		    }
+
+		    if (defined($info{$symbol, "high"}) && defined($info{$symbol, "low"})) {
+		        $info{$symbol, "day_range"} = $info{$symbol, "low"} . " - " . $info{$symbol, "high"};
+		    }
+
+		    if (defined($info{$symbol, "year_high"}) && defined($info{$symbol, "year_low"})) {
+		        $info{$symbol, "year_range"} = $info{$symbol, "year_low"}." - ".$info{$symbol, "year_high"};
+		    }
+		}
+		else {
+		    $info{$symbol, "errormsg"} = "Cannot parse quote data";
+		}
 	}
+
+	return wantarray() ? %info : \%info;
+}
+
+sub ParseStock($) {
+    my $ref = shift;
+    my $parser = $$ref;
+
+    my %info;
+
+    while (my $div = $parser->get_tag('div')) {
+        my $id = $div->get_attr('id');
+
+        if ($id eq "IDMScontainer") {
+            my $header = $parser->get_tag('h2');
+            my $name = trim($parser->get_trimmed_text('/h2'));
+
+            $name =~ s/&nbsp;//g;
+
+            # Should have at least 1 alphabetic character...
+            last unless ($name =~ /[A-Za-z]/);
+
+            $info{"name"} = $name;
+        }
+        elsif ($id eq "fundProfile") {
+            for (my $i=0; $i<6; $i++) {
+                my $span = $parser->get_tag('span');
+                my $class = $span->get_attr('class');
+
+                if ($class eq "price") {
+                    my $price = removedollar($parser->get_trimmed_text('/span'));
+
+                    last unless ($price =~ /^\d/);
+
+                    $info{"price"} = $price;
+                    $info{"success"} = 1;
+                }
+                elsif (($class eq "positive") || ($class eq "negative")) {
+                    my $raw = $parser->get_trimmed_text('/span');
+                    $raw =~ /^(\-)?\$((\d)+(\.\d\d))/;
+                    $info{"net"} = $1.$2;
+                }
+                elsif ($class eq "high") {
+                    $info{"high"} = removedollar($parser->get_trimmed_text('/span'));
+                }
+                elsif ($class eq "low") {
+                    $info{"low"} = removedollar($parser->get_trimmed_text('/span'));
+                }
+                elsif ($class eq "volume") {
+                    $info{"volume"} = $parser->get_trimmed_text('/span');
+                }
+            }
+        }
+        elsif ($id eq "quoteDetail") {
+            for (my $i=0; $i<18; $i++) {
+                my $th = $parser->get_tag('th');
+
+                if ($i == 3) {
+                    $info{"year_high"} = removedollar($parser->get_trimmed_text('th'));
+                }
+                elsif ($i == 5) {
+                    $info{"cap"} = removedollar($parser->get_trimmed_text('th'));
+                }
+                elsif ($i == 9) {
+                    $info{"year_low"} = removedollar($parser->get_trimmed_text('th'));
+                }
+                elsif ($i == 15) {
+                    $info{"exchange"} = $parser->get_trimmed_text('th');
+                }
+            }
+        }
+    }
+
+    return %info;
+}
+
+sub ParseFund($) {
+    my $ref = shift;
+    my $parser = $$ref;
+
+    my %info;
+
+    my $idmscount = 0;
+
+    while (my $div = $parser->get_tag('div')) {
+        my $id = $div->get_attr('id');
+
+        if ($id eq "IDMScontainer") {
+            $idmscount++;
+
+            if ($idmscount == 1) {
+                my $header = $parser->get_tag('h2');
+                my $name = trim($parser->get_trimmed_text('/h2'));
+
+                $name =~ s/&nbsp;//g;
+
+                # Should have at least 1 alphabetic character...
+                last unless ($name =~ /[A-Za-z]/);
+
+                $info{"name"} = $name;
+            }
+            else {
+                while (my $tr = $parser->get_tag('tr')) {
+                    my $tdcount = 0;
+
+                    while (my $td = $parser->get_tag('td')) {
+                        $tdcount++;
+
+                        if ($tdcount == 1) {
+                            my $price = removedollar($parser->get_trimmed_text('/td'));
+
+                            last unless ($price =~ /^\d/);
+
+                            $info{"nav"} = $price;
+                            $info{"success"} = 1;
+                        }
+                        elsif ($tdcount == 2) {
+                            $info{"net"} =
+                                removedollar($parser->get_trimmed_text('/span'));
+                        }
+                        elsif ($tdcount == 7) {
+                            $info{"currency"} =
+                                $parser->get_trimmed_text('/td');
+                        }
+                    }
+                }
+            }
+        }
     }
 
-    return wantarray() ? %info : \%info;
+    return %info;
+}
+
+sub getGMTDate() {
+    my @timeData = gmtime(time);
+    my $day = $timeData[3];
+    my $month = 1 + $timeData[4];
+    my $year = 1900 + $timeData[5];
+
+    return ($day, $month, $year);
+}
+
+sub trim($) {
+    my $string = shift;
+    $string =~ s/^\s+//;
+    $string =~ s/\s+$//;
+    return $string;
+}
+
+sub removedollar($) {
+    my $raw = shift;
+    $raw =~ /^\$?(.*)$/;
+    return $1;
 }
 
+
 1;
 
 =head1 NAME
 
-Finance::Quote::FinanceCanada - Obtain stock and mutual fund prices from
-finance.canada.com
+Finance::Quote::FinanceCanada - Obtain stock and fund prices
+from www.financialpost.com
 
 =head1 SYNOPSIS
 
@@ -171,25 +286,30 @@ finance.canada.com
     $q = Finance::Quote->new;
 
     # Can failover to other methods
-    %quotes = $q->fetch("canada", "stock_fund-code");
-    
+    %quotes = $q->fetch("canada", "stock_code");
+
     # Use this module only
-    %quotes = $q->fetch("financecanada", "stock_fund-code");
+    %quotes = $q->fetch("financecanada", "stock_code");
 
 =head1 DESCRIPTION
 
-This module obtains information about Canadian Stock and Mutual Funds from
-finanace.canada.com.  The information source "canada" can be used if the
+This module obtains information about Canadian stocks and funds from
+www.financialpost.com.  The information source "canada" can be used if the
 information source is unimportant, or "financecanada" to specifically use
-finance.canada.com.
+www.financialpost.com.
 
-=head1 STOCK_FUND-CODE
+=head1 STOCK_CODE
 
-Canadian stocks/mutual funds do not have a unique symbol identifier.  This
-module uses the symbols as used on finance.canada.com.  The simplest way
-to fetch the ID for a particular stock/fund is to go to finance.canada.com,
-search for your particular stock or mutual fund, and note the symbol ID.
-This is helpfully provided by the site in their returned HTML quote.
+Canadian stocks/mutual funds do not have a unique symbol identifier on
+www.financialpost.com.  For example, the symbol "T" can refer to stock
+quotes from either "Telus" on the Toronto Stock Exchange (TOR) or
+"AT&T Inc." on NYSE.  The simplest way to fetch the ID for a particular
+stock/fund is to go to www.financialpost.com, search for your
+particular stock/fund, and note the symbol 'id' in the site URL.
+
+Note that www.financialpost.com uses different URLs for stocks and funds.
+This module attempts to guess which URL to use based regular expressions tests
+on the symbol.
 
 =head1 LABELS RETURNED
 
@@ -199,9 +319,8 @@ method source name symbol currency date
 
 =head1 SEE ALSO
 
-Finance Canada.com website - http://finance.canada.com/
+Finance Canada.com website - http://www.financialpost.com/
 
 Finance::Quote
 
 =cut
-
Index: finance-quote/t/financecanada.t
===================================================================
--- finance-quote.orig/t/financecanada.t
+++ finance-quote/t/financecanada.t
@@ -1,36 +1,42 @@
-#!/usr/bin/perl -w
+#!/usr/bin/perl
+
 use strict;
-use Test::More;
-use Finance::Quote;
+use Test;
+use Data::Dumper;
 
-if (not $ENV{ONLINE_TEST}) {
-    plan skip_all => 'Set $ENV{ONLINE_TEST} to run this test';
-}
+BEGIN {plan tests => 22};
+
+use Finance::Quote;
 
-plan tests => 26;
+# Test FinanceCanada functions
 
-# Test Finance Canada functions.
+my $q = Finance::Quote->new();
+my @stocks = ("RY", "T", "15213", "283808");
 
-my $q      = Finance::Quote->new();
-my @stocks = ("NT","XIU","UUU", "PCA");
-my $year = (localtime())[5] + 1900;
-my $lastyear = $year - 1;
+my %regexps = (
+    RY  => qr/\bROYAL\b/,
+    T   => qr/\bAT\&T\b/,
+    15213   => qr/\bTD Canadian Index\b/,
+    283808  => qr/\bSPRINT\b/,
+);
 
 my %quotes = $q->fetch("financecanada", @stocks);
 ok(%quotes);
 
-# Check that the name and nav are defined for all of the stocks.
 foreach my $stock (@stocks) {
-	ok($quotes{$stock,"price"} > 0);
-	ok(length($quotes{$stock,"name"}));
-	ok($quotes{$stock,"success"});
-        ok($quotes{$stock, "currency"} eq "CAD");
-	ok(substr($quotes{$stock,"isodate"},0,4) == $year ||
-	   substr($quotes{$stock,"isodate"},0,4) == $lastyear);
-	ok(substr($quotes{$stock,"date"},6,4) == $year ||
-	   substr($quotes{$stock,"date"},6,4) == $lastyear);
+	my $name = $quotes{$stock, "name"};
+	print "#Testing $stock: $name\n";
+
+	my $regexp = $regexps{$stock};
+	ok($name =~ /$regexp/i);
+
+	ok($quotes{$stock, "method"} eq 'financecanada');
+
+	ok(($quotes{$stock, "price"} > 0) || ($quotes{$stock, "nav"} > 0));
+	ok($quotes{$stock, "net"} =~ /^-?\d+\.\d+$/);
+	ok($quotes{$stock, "success"});
 }
 
 # Check that a bogus stock returns no-success.
 %quotes = $q->fetch("financecanada", "BOGUS");
-ok(! $quotes{"BOGUS","success"});
+ok(! $quotes{"BOGUS", "success"});