Oops, forgot the scripts;-) Pjotr Kourzanov wrote: > Dear all, > > Having been dissatisfied with performance and features of xmltv2vdr > (too slow, no credits/category information in epg.data) I have created > a new script (based on AWK), that can be found in the attachment. > > If you want to try it, put your channels.conf.xmltv in one folder > with these two scripts, and then run cat *.xml | ./xmltv2epg > epg. > > My mileage: 54MB XML -> 18MB EPG in 44 minuties (xmltv2vdr used to > take 9 hours). > > Pjotr > > > _______________________________________________ > vdr mailing list > vdr@xxxxxxxxxxx > http://www.linuxtv.org/cgi-bin/mailman/listinfo/vdr > -------------- next part -------------- #!/bin/sh # external dependencies date="/bin/date" sed="/bin/sed" grep="/bin/grep" recode='/usr/bin/recode -f "$in_charset..$charset"' # input data dir=`dirname $0` channels="$dir/channels.conf.xmltv" # miscellaneous htmlnorm='s,<[^<>]*>,\n&\n,g' in_tz="Europe/Amsterdam" export in_charset=utf8 export charset=utf8 export LC_ALL=ru_RU.UTF-8 export TZ="Europe/Amsterdam" xmltv2epg_awk="$dir/xmltv2epg.awk" $sed -e $htmlnorm | $grep -v "^[ \t\r\n]*$" | $xmltv2epg_awk -vchannels="$channels" -vbindate="$date" | eval $recode -------------- next part -------------- #!/usr/bin/awk -f BEGIN { stderr="/dev/stderr" load_channels(channels) #curid="" separator["desc"]="|" separator["display-name"]=separator["title"]=separator["category"]=" / " separator["adapter"]=separator["presenter"]=separator["director"]=separator["actor"]=", " } #{ print >stderr } #/^<[^ \t\r\n]+/ { handled=0 } /^<rating/ { rating_system=gensub(/.*system="([^"]*)".*/,"\\1","g") } /^<subtitles/ { handled=1 data["subtitles"]=gensub(/.*type="([^"]*)".*/,"\\1","g") } /^<channel/ { handled=1 id=gensub(/.*id="([^"]*)".*/,"\\1","g") data["display-name"]="" } /^<\/channel>/ { handled=1 names[id]=data["display-name"] if (!channel_src[id]) { print "NO SRC for",id > stderr next } events[id]=1 } /^<programme/ { handled=1 id=gensub(/.*channel="([^"]*)".*/,"\\1","g") if (!events[id]) next #if (id!=curid && curid) { # curid=id #} if (events[id]==1) { if (in_channel) print "c" print "C",channel_src[id],names[id] in_channel=1 } start=gensub(/.*start="([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9]) (+[^"]*)".*/,"\\1-\\2-\\3 \\4:\\5:\\6 \\7","g") if (start==$0) start=gensub(/.*start="([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])? (+[^"]*)".*/,"\\1-\\2-\\3 \\4:\\5 \\7","g") stop=gensub(/.*stop="([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9]) (+[^"]*)".*/,"\\1-\\2-\\3 \\4:\\5:\\6 \\7","g") if (stop==$0) stop=gensub(/.*stop="([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])? (+[^"]*)".*/,"\\1-\\2-\\3 \\4:\\5 \\7","g") #print id,start,stop if (stop==$0) stop=start cmd=sprintf("%s -d '%s' +'%%s';"\ "%s -d '%s' +'%%s'", bindate,start, bindate,stop) i=1 while (cmd | getline line) dates[i++]=line close(cmd) if (dates[2]==dates[1]) dates[2]=dates[1]+60*60*3 print "E",events[id],dates[1],dates[2]-dates[1] delete data delete curtag } /^<\/programme/ { handled=1 if (!events[id]) next events[id]++ if(data["title"]) print "T",data["title"] if(data["category"]) print "S",data["category"] oOFS=OFS; OFS="|" ext="" for (t in curtag) ext=append(ext,t) print "D "\ (data["sub-title"] ? data["sub-title"] : "")\ (data["episode-num"] ? (data["sub-title"] ? ", " : "") "part " gensub(/ . (.*) . /,"\\1","g",data["episode-num"]) : "")\ (data["country"] ? data["country"] " " : "")\ (data["date"] ? data["date"] " " : "")\ (data["premiere"] ? "(premiere in " data["premiere"] ")" : ""), (data["desc"] ? data["desc"] : ""),"|", (ext ? ext " ": "")\ (data["audio"] ? data["audio"] " ": "")\ (data["video"] ? data["video"] " ": "")\ (data["colour"]=="no" ? " BW " : "")\ (data["subtitles"] ? data["subtitles"] " ": ""), (data["adapter"] ? "|Adapter: " data["adapter"] : "")\ (data["presenter"] ? "|Presenter: " data["presenter"] : "")\ (data["director"] ? "|Director: " data["director"] : "")\ (data["actor"] ? "|Actor(s): " data["actor"] : "")\ (data["writer"] ? "|Writer(s): " data["writer"] : ""), (data["rating"] ? "|rating " data["rating"] (rating_system ? "(" rating_system ")" : "") : "")\ (data["star-rating"] ? "|" data["star-rating"] : "") OFS=oOFS print "e" } !handled && /^<[^ \t\r\n]+.*\/>/ { tag=gensub(/^<([^ \t\r\n]+).*>/,"\\1","g") #print "bool tag:",tag curtag[tag]=1 next } !handled && /^<[^ \t\r\n]+.*>/ { tag=gensub(/^<([^ \t\r\n]+).*>/,"\\1","g") if (tag ~ /^\//) { tag=substr(tag,2) value=0 delete active[tag] } else active[tag]=1 #tags[tag]=1 #print "tag:",tag,value next } !handled { for (t in active) data[t]=append(data[t],txtconv($0),separator[t]) } END { print "c" #for (t in tags) print t > stderr } function load_channels(channels, a,i,n,chsrc,b,c,j,m) { oFS=FS; FS=":" while (getline < channels) if ($0 !~ /^:/) { name=$1; freq=$2; mode=$3; src=$4 sr=$5; vpid=$6; apid=$7; tpid=$8 ca=$9; spid=$10; npid=$11; tid=$12 rid=$13; esrc=$14 n=split(esrc,a,";") for (i=1; i<=n; i++) { chsrc=src "-" npid "-" tid "-" spid if (a[i] ~ /^vsetv=/) { split(a[i],b,"=") m=split(b[2],c,",") for (j=1; j<=m; j++) { print "vsetv:",chsrc,c[j] > stderr channel_src[c[j]]=append(channel_src[c[j]],chsrc) channel_ch[chsrc]=append(channel_ch[chsrc],c[j]) } } else { m=split(a[i],c,",") for (j=1; j<=m; j++) { print "xmltv:",chsrc,c[j] > stderr #channel_src[c[j]]=append(channel_src[c[j]],chsrc) channel_src[c[j]]=chsrc channel_ch[chsrc]=append(channel_ch[chsrc],c[j]) } } } } FS=oFS } function append(a,b,c) { return a (a ? (c ? c : " ") : "") b } function txtconv(a) { a=gensub("&","\\&","g",a) a=gensub(""","\"","g",a) a=gensub("'","'","g",a) a=gensub("<","<","g",a) a=gensub(">;",">","g",a) a=gensub(":",":","g",a) a=gensub("ö","?","g",a) a=gensub("’","'","g",a) return a }