[PATCH v3 2/6] geoip: adapt to GeoLite2 database

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Philip Prindeville <philipp@xxxxxxxxxxxxxxxxxxxxx>

Requires Net::CIDR::Lite for manipulating CIDR blocks, aggregation, etc.
since database is stored as subnet/mask pairs and may require compaction
into ranges (which can combine adjacent subnets).

We don't use Net::CIDR because it's a clunkier interface.

Signed-off-by: Philip Prindeville <philipp@xxxxxxxxxxxxxxxxxxxxx>
---
 geoip/xt_geoip_build | 255 ++++++++++++++++++++++++++++++++++---------
 geoip/xt_geoip_dl    |  11 +-
 2 files changed, 206 insertions(+), 60 deletions(-)

diff --git a/geoip/xt_geoip_build b/geoip/xt_geoip_build
index 871c94befbd4f5d8ac9450ad8366c020b7abbd03..f71d3f6b1455e930cb973ea25828a7f1badb3800 100755
--- a/geoip/xt_geoip_build
+++ b/geoip/xt_geoip_build
@@ -1,10 +1,13 @@
 #!/usr/bin/perl
 #
 #	Converter for MaxMind CSV database to binary, for xt_geoip
-#	Copyright © Jan Engelhardt, 2008-2011
+#	Copyright Jan Engelhardt, 2008-2011
+#	Copyright Philip Prindeville, 2018
 #
 use Getopt::Long;
-use IO::Handle;
+use Net::CIDR::Lite;
+use Socket qw(AF_INET AF_INET6 inet_pton);
+use warnings;
 use Text::CSV_XS; # or trade for Text::CSV
 use strict;
 
@@ -25,33 +28,186 @@ if (!-d $target_dir) {
 	exit 1;
 }
 
+my %countryId;
+my %countryName;
+
+my $dir = findVersion();
+
+&loadCountries();
+
 &dump(&collect());
 
-sub collect
+sub findVersion
 {
-	my %country;
-
-	while (my $row = $csv->getline(*ARGV)) {
-		if (!defined($country{$row->[4]})) {
-			$country{$row->[4]} = {
-				name => $row->[5],
-				pool_v4 => [],
-				pool_v6 => [],
-			};
+	my @dirs = ();
+
+	opendir(my $dh, '.') || die "Can't open .: $!\n";
+
+	while (readdir $dh) {
+		if ($_ =~ m/^GeoLite2-Country-CSV_\d{8}$/) {
+			push(@dirs, $_);
 		}
-		my $c = $country{$row->[4]};
-		if ($row->[0] =~ /:/) {
-			push(@{$c->{pool_v6}},
-			     [&ip6_pack($row->[0]), &ip6_pack($row->[1])]);
+	}
+	closedir $dh;
+
+	@dirs = sort @dirs;
+	return pop(@dirs);
+}
+
+sub loadCountries
+{
+	my $file = "$dir/GeoLite2-Country-Locations-en.csv";
+
+	%countryId = ();
+	%countryName = ();
+
+	open(my $fh, '<', $file) || die "Couldn't open list country names\n";
+
+	# first line is headers
+	my $row = $csv->getline($fh);
+
+	my %header = map { ($row->[$_], $_); } (0..$#{$row});
+
+	my %pairs = (
+		country_iso_code => 'ISO Country Code',
+		geoname_id => 'ID',
+		country_name => 'Country Name',
+		continent_code => 'Continent Code',
+		continent_name => 'Continent Name',
+	);
+
+	# verify that the columns we need are present
+	map { die "Table has no $pairs{$_} column\n" unless (exists $header{$_}); } keys %pairs;
+
+	my $id = $header{geoname_id};
+	my $cc = $header{country_iso_code};
+	my $long = $header{country_name};
+	my $ct = $header{continent_code};
+	my $cn = $header{continent_name};
+
+	while (my $row = $csv->getline($fh)) {
+		if ($row->[$cc] eq '' && $row->[$long] eq '') {
+			$countryId{$row->[$id]} = $row->[$ct];
+			$countryName{$row->[$ct]} = $row->[$cn];
 		} else {
-			push(@{$c->{pool_v4}}, [$row->[2], $row->[3]]);
+			$countryId{$row->[$id]} = $row->[$cc];
+			$countryName{$row->[$cc]} = $row->[$long];
 		}
+	}
+
+	$countryName{A1} = 'Anonymous Proxy';
+	$countryName{A2} = 'Satellite Provider';
+	$countryName{O1} = 'Other Country';
+
+	close($fh);
+}
+
+sub lookupCountry
+{
+	my ($id, $rid, $proxy, $sat) = @_;
+
+	if ($proxy) {
+		return 'A1';
+	} elsif ($sat) {
+		return 'A2';
+	}
+	$id ||= $rid;
+	if ($id eq '') {
+		return 'O1';
+	}
+	die "Unknown id: $id line $.\n" unless (exists $countryId{$id});
+	return $countryId{$id};
+}
+
+sub collect
+{
+	my ($file, $fh, $net, $id, $rid, $proxy, $sat, $row);
+	my (%country, %header);
+
+	my %pairs = (
+		network => 'Network',
+		registered_country_geoname_id => 'Registered Country ID',
+		geoname_id => 'Country ID',
+		is_anonymous_proxy => 'Anonymous Proxy',
+		is_satellite_provider => 'Satellite',
+	);
+
+	foreach (sort keys %countryName) {
+		$country{$_} = {
+			name => $countryName{$_},
+			pool_v4 => Net::CIDR::Lite->new(),
+			pool_v6 => Net::CIDR::Lite->new(),
+		};
+	}
+
+	$file = "$dir/GeoLite2-Country-Blocks-IPv4.csv";
+
+	open($fh, '<', $file) || die "Can't open IPv4 database\n";
+
+	# first line is headers
+	$row = $csv->getline($fh);
+
+	%header = map { ($row->[$_], $_); } (0..$#{$row});
+
+	# verify that the columns we need are present
+	map { die "Table has no %pairs{$_} column\n" unless (exists $header{$_}); } keys %pairs;
+
+	$net = $header{network};
+	$id = $header{geoname_id};
+	$rid = $header{registered_country_geoname_id};
+	$proxy = $header{is_anonymous_proxy};
+	$sat = $header{is_satellite_provider};
+
+	while ($row = $csv->getline($fh)) {
+		my ($cc, $cidr);
+
+		$cc = lookupCountry($row->[$id], $row->[$rid], $row->[$proxy], $row->[$sat]);
+		$cidr = $row->[$net];
+		$country{$cc}->{pool_v4}->add($cidr);
+
+		if ($. % 4096 == 0) {
+			print STDERR "\r\e[2K$. entries";
+		}
+	}
+
+	print STDERR "\r\e[2K$. entries total\n";
+
+	close($fh);
+
+	$file = "$dir/GeoLite2-Country-Blocks-IPv6.csv";
+
+	open($fh, '<', $file) || die "Can't open IPv6 database\n";
+
+	# first line is headers
+	$row = $csv->getline($fh);
+
+	%header = map { ($row->[$_], $_); } (0..$#{$row});
+
+	# verify that the columns we need are present
+	map { die "Table has no %pairs{$_} column\n" unless (exists $header{$_}); } keys %pairs;
+
+	$net = $header{network};
+	$id = $header{geoname_id};
+	$rid = $header{registered_country_geoname_id};
+	$proxy = $header{is_anonymous_proxy};
+	$sat = $header{is_satellite_provider};
+
+	while ($row = $csv->getline($fh)) {
+		my ($cc, $cidr);
+
+		$cc = lookupCountry($row->[$id], $row->[$rid], $row->[$proxy], $row->[$sat]);
+		$cidr = $row->[$net];
+		$country{$cc}->{pool_v6}->add($cidr);
+
 		if ($. % 4096 == 0) {
 			print STDERR "\r\e[2K$. entries";
 		}
 	}
 
 	print STDERR "\r\e[2K$. entries total\n";
+
+	close($fh);
+
 	return \%country;
 }
 
@@ -59,7 +215,7 @@ sub dump
 {
 	my $country = shift @_;
 
-	foreach my $iso_code (sort keys %$country) {
+	foreach my $iso_code (sort keys %{$country}) {
 		&dump_one($iso_code, $country->{$iso_code});
 	}
 }
@@ -67,50 +223,41 @@ sub dump
 sub dump_one
 {
 	my($iso_code, $country) = @_;
-	my($file, $fh);
+	my @ranges;
 
-	printf "%5u IPv6 ranges for %s %s\n",
-		scalar(@{$country->{pool_v6}}),
-		$iso_code, $country->{name};
+	@ranges = $country->{pool_v4}->list_range();
 
-	$file = "$target_dir/".uc($iso_code).".iv6";
-	if (!open($fh, "> $file")) {
-		print STDERR "Error opening $file: $!\n";
-		exit 1;
-	}
-	foreach my $range (@{$country->{pool_v6}}) {
-		print $fh $range->[0], $range->[1];
-	}
-	close $fh;
+	writeCountry($iso_code, $country->{name}, AF_INET, @ranges);
 
-	printf "%5u IPv4 ranges for %s %s\n",
-		scalar(@{$country->{pool_v4}}),
-		$iso_code, $country->{name};
+	@ranges = $country->{pool_v6}->list_range();
 
-	$file = "$target_dir/".uc($iso_code).".iv4";
-	if (!open($fh, "> $file")) {
+	writeCountry($iso_code, $country->{name}, AF_INET6, @ranges);
+}
+
+sub writeCountry
+{
+	my ($iso_code, $name, $family, @ranges) = @_;
+	my $fh;
+
+	printf "%5u IPv%s ranges for %s %s\n",
+		scalar(@ranges),
+		($family == AF_INET ? '4' : '6'),
+		$iso_code, $name;
+
+	my $file = "$target_dir/".uc($iso_code).".iv".($family == AF_INET ? '4' : '6');
+	if (!open($fh, '>', $file)) {
 		print STDERR "Error opening $file: $!\n";
 		exit 1;
 	}
-	foreach my $range (@{$country->{pool_v4}}) {
-		print $fh pack("NN", $range->[0], $range->[1]);
-	}
-	close $fh;
-}
 
-sub ip6_pack
-{
-	my $addr = shift @_;
-	$addr =~ s{::}{:!:};
-	my @addr = split(/:/, $addr);
-	my @e = (0) x 8;
-	foreach (@addr) {
-		if ($_ eq "!") {
-			$_ = join(':', @e[0..(8-scalar(@addr))]);
-		}
+	binmode($fh);
+
+	foreach my $range (@ranges) {
+		my ($start, $end) = split('-', $range);
+		$start = inet_pton($family, $start);
+		$end = inet_pton($family, $end);
+		print $fh $start, $end;
 	}
-	@addr = split(/:/, join(':', @addr));
-	$_ = hex($_) foreach @addr;
-	return pack("n*", @addr);
+	close $fh;
 }
 
diff --git a/geoip/xt_geoip_dl b/geoip/xt_geoip_dl
index 50d3806ebe50a21fc102823868a91472d1dbde71..1de60442a8040f55d775d134d7a8ea707582d71e 100755
--- a/geoip/xt_geoip_dl
+++ b/geoip/xt_geoip_dl
@@ -1,8 +1,7 @@
 #!/bin/sh
 
-rm -f GeoIPv6.csv GeoIPv6.csv.gz GeoIPCountryCSV.zip GeoIPCountryWhois.csv;
-wget \
-	http://geolite.maxmind.com/download/geoip/database/GeoIPv6.csv.gz \
-	http://geolite.maxmind.com/download/geoip/database/GeoIPCountryCSV.zip;
-gzip -d GeoIPv6.csv.gz;
-unzip GeoIPCountryCSV.zip;
+rm -rf GeoLite2-Country-CSV_*
+
+wget -q http://geolite.maxmind.com/download/geoip/database/GeoLite2-Country-CSV.zip
+unzip -q GeoLite2-Country-CSV.zip
+rm -f GeoLite2-Country-CSV.zip
-- 
2.17.1




[Index of Archives]     [Linux Netfilter Development]     [Linux Kernel Networking Development]     [Netem]     [Berkeley Packet Filter]     [Linux Kernel Development]     [Advanced Routing & Traffice Control]     [Bugtraq]

  Powered by Linux