#!/usr/bin/perl
# pajek_labelvector.pl
# Gabriel Rossman, UCLA, 2009-09-22
# this file extracts the vertice labels from a .net file and merges it (by sort order) with one or more .vec files
# take filenames as arguments
# file 1 is .net, files 2-k are .vec
# writes out foo.txt as tab delimited text
# note, this is dependent on an unchanged sort order

use strict; use warnings;
die "usage: pajek_labelvector.pl   ... \n" unless @ARGV > 1;

my $netfile = shift (@ARGV);
my @labels=();
#read the vertice labels from .net file
open(NETIN, "<$netfile") or die "error reading $netfile for reading";
while (<NETIN>) {
	if ($_ =~ m/"/) { #only use the vertice label lines, which include quote chars
		$_ =~ /^[0-9]+ "(.*)"/; #search for quoted text
		push @labels, $1; #return match, push to array
	}
}
close NETIN;
#read netfile
foreach my $vecfile (@ARGV) {
	open(VECIN, "<$vecfile") or die "error reading $vecfile"; 	open(VECOUT, ">$vecfile.txt") or die "error creating $vecfile.txt";
	my @vec=();
	while (<VECIN>) {
		$_ =~ s/\015?\012//; #manual chomp to allow windows or unix text
		if ($_ !~ m/^\*/) {
			push @vec, $_;
		}
	}
	close VECIN;
	my $veclength = @vec - 1;
	my $lablength = @labels -1;
	die "error, $vecfile is different length than $netfile" unless $veclength==$lablength;
	for my $i (0..$veclength) {
		print VECOUT "$labels[$i]\t$vec[$i]\n";
	}
	close VECOUT;
	@vec=();
}

print "WARNING: this script assumes that the .vec and .net have the same sort order\nplease spot check the values to avoid error\n";