#!/usr/bin/perl -w -T use strict; $|=1; my ($opt_d, $opt_u, $opt_p, ); my ($opt_m, $opt_y,); my ($opt_s, $opt_D, $opt_v, ); use Getopt::Long qw(:config no_ignore_case bundling); GetOptions( 'd=s' => \$opt_d, # db name 'u=s' => \$opt_u, # db user 'p=s' => \$opt_p, # db pass 'm' => \$opt_m, # mail mode 'y' => \$opt_y, # yaml mode 's=i' => \$opt_s, # data source 'D' => \$opt_D, # dry-run 'v' => \$opt_v, # verbose ); $opt_d ||= 'eve'; $opt_u ||= 'eve'; $opt_s ||= 2 if $opt_m; use EVE::Db; my $dbh = EVE::Db::dbh; die "cant connect to db: ".DBI->errstr unless $dbh; my %CHECKS = (); my %overlay = ( datasource => $opt_s, ); my @head; my $line=0; my $last=""; die "multifile mode currently broken" if @ARGV > 1; my @lines = (); if ($opt_y) { my @y; { local $\; @y = <>; } require YAML; my $y = YAML::Load(join("",@y)); # use Data::Dumper; # print Dumper($y); #':version' => '1.5.0.0', my $v = $y->{':version'}; warn "no version" unless defined $v; warn "bad version '$v'" unless !defined $v || $v =~ /^(1\.5\.0(\.0)?|0\.23|2\.0)$/; my $idlog = "/tmp/em-id.log"; my @ids = (); for my $k (sort keys %$y) { next unless $k =~ /_id$/; my $v = $y->{$k}; $v = "" unless defined $v; push @ids, sprintf("'%s'=>'%s'", $k, $v); } if (@ids) { my $logit = join(" ", @ids)."\n"; open IDL, ">>", $idlog or die "cant open($idlog): $!"; print IDL $logit; close IDL or die "cant close($idlog): $!"; } #':received_at' => '2009-12-07 10:30:51.805502 -00:00', die "dupe datatime" if $overlay{datatime}; my $edt = $y->{":received_at"}; die "no datatime" unless $edt; # TODO: timezones use Date::Parse; my $et = str2time($edt); die "failed to parse edt '$edt'" unless $et; use POSIX qw(strftime); my $dt = strftime "%Y-%m-%d %H:%M:%S", gmtime($et); #die "bad datatime '$dt'" unless $dt =~ /^(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d\.\d\d\d)\d* [-+]00:00$/; die "bad datatime '$dt'" unless $dt =~ /^(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d)$/; $overlay{datatime} = $1; #':developer_key_id' => '4', #':generated_at' => '2009-12-07 10:31:11', #':user_id' => '2287' #':body' => '52000,10000,16683,32767,1320862101,10000,1,False,2009-10-13 19:16:06,90,60003760,10000002,30000142,0,cache my $b = $y->{':body'}; die "no body" unless $b; $b = $b->{str} if ref $b && ref $b eq 'HASH' && $b->{str}; $b =~ s/[\r\n]+/\n/g; my @b = split "\n", $b; my $f = $b[0]; die "bad first row '$f'" unless $f =~ /^\d+(\.\d+)?,/; my $h = "price,volRemaining,typeID,range,orderID,volEntered,minVolume,bid,issued,duration,stationID,regionID,solarSystemID,jumps,source"; @lines = ($h, @b); } LINE: while ($_ = (<> || shift @lines)) { s/[\r\n]+$//g; print $_."\n" if $opt_v; if ($opt_y) { # forgetting the c in csv, are we? s/,(\d+)([-a-z]+)$/,$1,$2/; } if ($opt_m) { if (/^Date:/) { die "dupe datastamp" if $overlay{datatime}; if (/^Date: \w\w\w, +(\d+) (\w+) (\d+) (\d\d:\d\d:\d\d) \+0000/) { my ($d, $ms, $y, $t,) = ($1,$2,$3,$4,); my $m = { Jan => 1, Feb => 2, Mar => 3, Apr => 4, May => 5, Jun => 6, Jul => 7, Aug => 8, Sep => 9, Oct => 10, Nov => 11, Dec => 12, }->{$ms}; die "bad month: $ms" unless $m; my $ts = sprintf "%04i-%02i-%02i %s", $y, $m, $d, $t; $overlay{datatime} = $ts; } else { die "bad date format: $_"; } } next if /^(\S+:|\s|From )/; $opt_m = 0 if /^[\r\n]*$/; } next if /^[\s\r\n]*$/; $line++; # s/\s*,\s*/,/g; # strip whitespace sugar # die "tainted line" if /#/; s/"?,"?/#/g; s/(^"|"$)//g; # 1 day,0:00:00 s/(\d+) day(?:s)?[,#](0:00:00)([,#])/$1:0$2$3/g; # i haet EC s/[\s\r\n]+$//; # chompitychomp my @fields = split /#/, $_; if (/^[\w#]+$/) { # is this a (new) header? if (defined $ARGV && $ARGV ne $last) { # filename changed die "multifile mode currently not supported: $last != $ARGV" if $last; $last = $ARGV; if ($last =~ /-(\d\d\d\d)\.(\d\d)\.(\d\d) (\d\d)(\d\d)(\d\d)\.txt$/ || $last =~ /(?:^|\/)(\d\d\d\d)-(\d\d)-(\d\d)\.(\d\d):(\d\d):(\d\d)\.[^\/]*$/) { #die "multifile mode currently not supported"; my $ts = sprintf "%04i-%02i-%02i %02i:%02i:%02i", $1, $2, $3, $4, $5, $6; $overlay{datatime} = $ts; $overlay{datasource} ||= 1; } else { die "unknown filename pattern: $last"; } } else { warn "head in line $line" unless $line == 1; } my %trans = ( solarsystemid => "systemid", volremaining => "volremain", volentered => "volenter", # EM2 issued_at => "issued", updated_at => "reportedtime", eve_id => "orderid", sale_volume => "minvolume", available_volume=> "volremain", initial_volume => "volenter", ); @head = map { $a=lc; $trans{$a}||$a } @fields; #printf "HEAD: %s\n", join(" ", @head); next; } die "no head in line $line: '$_'" unless @head; # fuuuuu ... eve-c with optional "source" field if (scalar(@head) == scalar(@fields)+1 && $head[$#head] eq 'source') { push @fields, "evecentral"; } #fieldcount mismatch 15 vs 16 for ' #1868704017,10000002,30000142,60003760,5971,0,14989.0,1,3,3,2011-01-05 23:58:43,1 day,0:00:00,32767,0,2011-01-06 00:01:26.073714, #' at scan-market.pl line 189, <> line 14. unless (scalar(@head) == scalar(@fields)) { die "fieldcount mismatch ".scalar(@head)." vs ".scalar(@fields)." for '$_'"; next; } #die "fieldcount mismatch ".scalar(@head)." vs ".scalar(@fields) unless scalar(@head) == scalar(@fields); #my $expr_date = '(\d\d\d\d-\d\d-\d\d(?: \d\d:\d\d(?::\d\d(?:\.\d+)?)?)?)'; my $expr_date = '(\d\d\d\d-\d\d-\d\d \d\d:\d\d(?::\d\d(?:\.\d+)?)?)'; my %checks = ( issued => $expr_date, bid => "(True|False|[01])", price => '(\d+(?:\.\d+)?)', source => '(evem-(log|cache)|evecentral|evemetrics|cache|log|api)', duration => '(\d+)(?:[:.]00)*', volremain => '(\d+(?:\.\d+)?)', reportedtime => $expr_date, # new eve-m sell => '([tf])', expired => '([tf])', trusted => '([tf])', expires_at => $expr_date, created_at => $expr_date, ); my %row = (); for my $i (0..$#head) { my $k = $head[$i]; my $v = $fields[$i]; die "trying to redefine $k" if exists $row{$k}; # new eve-m $k =~ s/_id$/id/; if ($k eq 'issued') { $v =~ s/^00(\d\d)(-\d\d-)2009 /2009$2$1 /; } my $r = $checks{$k}||'(-?\d+)'; unless ($v =~ /^$r$/i) { warn "SKIP: $_\nSKIP: $k check failed: '$v' !~ /^$r\$/"; next LINE; } $v = $1; if ($k eq 'bid') { $v = { true => 1, false => 0, }->{lc($v)}||$v; } if ($k eq 'sell') { my $V = { t => 0, f => 1, }->{$v}; die "cant remap sell:$v" unless defined $V; $v = $v; $k = "bid"; die "bid and sell" if exists $row{bid}; } $row{$k} = $v; } # SANITY CHECKS die "REPORTEDBY: $_\n" if defined $row{reportedby} && $row{reportedby}>0; my $np = sprintf "%.2f", $row{price}; my $rd = abs($row{price} - $np); die "rounding issue: $rd for $np vs ".$row{price} if $rd > 0.0051; $row{price} = $np; if (exists $row{source} && !$opt_s) { die "source and trusted" if exists $row{trusted}; die "bad sauce: ".$row{source} unless $row{source} =~ /^(evem-)?(log|cache|api)$/; if ($2 eq 'log') { if ($1) { $overlay{datasource} = 10; } else { $overlay{datasource} = 15; } } elsif ($2 eq 'cache') { if ($1) { $overlay{datasource} = 11; } else { $overlay{datasource} = 16; } } elsif ($2 eq 'api') { if ($1) { die "should not happen"; $overlay{datasource} = 12; } else { $overlay{datasource} = 17; } } else { die "internal error"; } } if (exists $row{source} && $opt_m) { die "source and trusted" if exists $row{trusted}; die "bad sauce: ".$row{source} unless $row{source} =~ /^eve(metrics|central)$/; if ($1 eq 'central') { $overlay{datasource} = 20; } elsif ($1 eq 'metrics') { $overlay{datasource} = 21; } else { die "internal error"; } } if (exists $row{trusted} && !$opt_s) { die "trusted and source" if exists $row{source}; if ($row{trusted} eq "f") { $overlay{datasource} = 15; } elsif ($row{trusted} eq "t") { $overlay{datasource} = 16; } else { die "internal error"; } } if (exists $row{expires_at}) { die "expires_at and duration" if exists $row{duration}; die "expires_at and no issued" unless exists $row{issued}; use Date::Parse; my $ets = delete $row{expires_at}; my $et = str2time($ets, "UTC"); die "failed to parse ets '$ets'" unless $et; my $its = $row{issued}; my $it = str2time($its, "UTC"); die "failed to parse its '$its'" unless $it; my $ds = $et - $it; unless ($ds > 0) { warn "reverse time $ds ($its -> $ets) trusted:".(defined $row{trusted} ? $row{trusted} : "n/a"); next; } if ($ds % (60*60*24)) { warn "duration tid:".$row{typeid}." oid:".$row{orderid}." not days $ds ($its -> $ets)"; printf STDERR "ERR: %s\n", $_; next; } my $dd = $ds / (60*60*24); die "funny duration $dd ($its -> $ets)" unless $dd =~ /^(1|3|7|14|30|90|365)$/; $row{duration} = $dd; } if (!($row{orderid}+0)) { warn "orderid 0"; next; } if ($row{duration} == 365 && $row{price} == 100 && $row{volenter} == 100000000) { warn "SISI row for orderid:".$row{orderid}; next; } unless ($row{duration} =~ /^(1|3|7|14|30|90|365)$/) { warn "funny duration:".$row{duration}." for orderid:".$row{orderid}; next; } if ($row{range} =~ /^(32767|65535)$/) { $row{range} = 127; } unless ($row{range} =~ /^(-1|0|1|2|3|4|5|10|20|30|40|127)$/) { warn "funny range:".$row{range}." for orderid:".$row{orderid}; next; } my $tid = $row{typeid}; die "no typeid for $_" unless $tid; unless (exists $CHECKS{$tid}) { my $sth = $dbh->prepare_cached(qq{ SELECT typeName FROM dbo.invTypes WHERE typeID = ? }) or die $dbh->errstr; $sth->execute($tid) or die $sth->errstr; my $tn = $sth->fetchrow_arrayref(); $sth->finish; my $tnn; if ($tn && @$tn) { ($tnn,) = @$tn; } else { warn "no invTypes for typeID $tid"; #next; $tnn = "UNKNOWN"; } die "no typeName for typeID $tid" unless $tnn; $CHECKS{$tid} = $tnn; } die "typeID $tid failed check" unless exists $CHECKS{$tid}; my $staid = $row{stationid}; die "no stationid for $_" unless $staid; my $sysid = $row{systemid}; die "no systemid for $_" unless $sysid; my $regid = $row{regionid}; die "no regionid for $_" unless $regid; my $skey = "$staid $sysid $regid"; unless ($CHECKS{$skey}) { my $sth = $dbh->prepare_cached(qq{ SELECT sta.stationName, sys.solarsystemID, sys.regionID FROM eve.apiStations sta, dbo.mapSolarSystems sys WHERE sta.stationID = ? and sta.solarSystemID = sys.solarSystemID }) or die $dbh->errstr; $sth->execute($staid) or die $sth->errstr; my $sn = $sth->fetchrow_arrayref(); $sth->finish; unless ($sn) { warn "unknown stationid $skey"; next; } die "no stationName for $skey" unless $sn; my ($snn, $ssid, $srid,) = @$sn; die "no stationName for $skey" unless $snn; die "sysid $ssid mismatch for $skey" unless $ssid == $sysid; die "regid $srid mismatch for $skey" unless $srid == $regid; $CHECKS{$skey} = $snn; } die "station $skey failed check" unless $CHECKS{$skey}; my $dtime = $row{reportedtime}||$overlay{datatime}; # my $usth = $dbh->prepare_cached(qq{ # UPDATE # eve.marketRaw # SET # seen = seen + 1, # first = least(first, ?), # last = greatest(last, ?) # WHERE # orderid = ? # and issued = ? # and volremain = ? # and price = ? # }) or die $dbh->errstr; # my $urc = $usth->execute( # $dtime, $dtime, # $row{orderid}, $row{issued}, $row{volremain}, $row{price} # ) or die $usth->errstr; # if (defined $urc && $urc == 0) { # # no update, pass # } elsif (defined $urc && $urc > 0) { # == 1) { ## XXX TODO cleanup # # updated existing row, move on # next; # } else {#if (defined $urc) { # die "unexpected update result '$urc' for orderid:".$row{orderid}; # } my $q = ""; my @q = (); for my $f (qw { orderid stationid typeid bid price minvolume volremain volenter issued duration range }) { die "missing $f in $_" unless defined $row{$f}; $q .= "\t\t\t`$f` = ?,\n"; push @q, $row{$f}; } $q =~ s/^\t\t\t//; $q =~ s/\n$//; my $Q = qq{ INSERT INTO eve.marketRaw SET $q datasource = ?, first = ?, last = ? ON DUPLICATE KEY UPDATE seen = seen + 1, first = least(first, ?), last = greatest(last, ?) }; # printf STDERR "QUERY:\n%s\n", $Q; my $sth = $dbh->prepare_cached($Q) or die $dbh->errstr; push @q, ($overlay{datasource}||0); push @q, ($dtime, $dtime,); push @q, ($dtime, $dtime,); # use Data::Dumper; # print Dumper(\@q); unless ($opt_D) { $sth->execute(@q) or die $sth->errstr; } } exit 0;