#!/usr/bin/python -OO
# -*- coding: utf-8 -*-
# (c) 2008 Antti Palosaari <crope@iki.fi>
# v1.1

# download pdf from http://www.digitv.fi/sivu.asp?path=1;8224;9519
# pdftotext -raw 2008_Kanavat\ ja\ taajuudet.pdf 

import os
import sys
import re
import struct
import string
import codecs

fread = file( sys.argv[1], "r" )

# Finland uses UHF channels 21-69
ch_min = 21
ch_max = 69

for line in fread.readlines():
#	print "raw line: " + line
	m = re.match(r'^[\w\s,.ÄäÖöÅå\*\-]*[\d]{1,2}[\s]{1}[\d]{3}.*$', line)
	if not m:
		print "SKIPPED: " + line
	else:
		# replace scandic characters
		line = re.sub(r'Ä', "A", line)
		line = re.sub(r'ä', "a", line)
		line = re.sub(r'Ö', "O", line)
		line = re.sub(r'ö', "o", line)
		line = re.sub(r'Å', "A", line)
		line = re.sub(r'å', "a", line)

		# remove all unspecified chars from beginning of line
		line = re.sub(r'^[^a-zA-ZÄäÖöÅå]*', "", line)

		line = re.sub(r'[,]', "", line) # remove ','
		line = re.sub(r'[,]', "", line) # remove ','
		line = re.sub(r'[\ ](?=[\D])', '_', line); # replace ' ' with '_'
		line = "fi-" + line # add "fi-"
#		print line

		elem = re.split( ' ', line )
		for i in range(len(elem)):
			elem[i] = elem[i].strip()

		for i in range(len(elem)):
			if i == 0:
				data =  "# automatically generated from http://www.digitv.fi/sivu.asp?path=1;8224;9519\n"
				data += "# T freq bw fec_hi fec_lo mod transmission-mode guard-interval hierarchy\n"
				fwrite = open("./data/" + elem[i], "wb")
			elif (i % 2): # ch number
				if int(elem[i]) < ch_min or int(elem[i]) > ch_max:
					print "ERROR: chan number not valid " + str(elem[i])
			elif (not i % 2): # freq
				data += "T " + str(elem[i]) + "000000 8MHz 2/3 NONE QAM64 8k 1/8 NONE\n"
		fwrite.write(data)
fread.close()
fwrite.close()
