<package> <job id="G2X"> <?job debug="true"?> <script language="JScript" src="ANSELentify.js"> // ----------------------------------------------------------------- //~ //~ GEDCOM one-to-one to XML for "Windows Script Host" (WSH) //~ USAGE: cscript //nologo ged1212xml.wsf [/name:value [...]] [<stdin.ged] [>stdout.xml] [2>stderr.log] //~ NOTES: double slashes for cscript-arguments, e.g. //nologo, single slashes for wsf-arguments //~ OPTIONS: //~ FILES //~ /ged:<file.ged> //~ GEDCOM input-filename, default=STDIN //~ /xml:<file.xml> //~ XML output-filename, default=STDOUT //~ /log:<file.log> //~ Logging output-filename, default=STDERR //~ GED-INPUT-ENCODING-MODE //~ /ans:true //~ start "ANSEL to Entity"-mode before 1st occurence of +n CHAR ANSEL //~ XML-OUTPUT //~ /pfx:<nmtoken> //~ xml namespace prefix, requires setting of /uri:<URI> too, default=none //~ /uri:<URI> //~ xml namespace URI for xmlns[:nsPFX]="...", default=none //~ /enc:"iso-8859-1"|<encoding> //~ replace xml declaration's default <?xml ... encoding="iso-8859-1" ?> //~ /sty:<file.css|file.xsl> //~ insert processing-instruction <?xml-stylesheet ... href="..."?>, default=none //~ /root:"GED"|<nmtoken> //~ replace root-element's default tag-name "GED" //~ /id:"ID"|<nmtoken> //~ replace attribute-name's default "ID" for GEDCOM's @<XREF>@s //~ /ref:"REF"|<nmtoken> //~ replace attribute-name's default "REF" for GEDCOM's @<XREF>@s //~ /dtd:""|<file.dtd> //~ insert doctype-definition <!DOCTYPE ... SYSTEM "...">, default=none //~ /xsd:""|<file.xsd> //~ insert root's xsi:XMLSchema-instance-location-definition, default=none //~ /ifx=""|"id."|"ged:"|<nmtoken> //~ ID-prefix to create valid xmlID/REF-values, default=none //~ ID-prefix == string-additive, don't confuse it with namespace-prefixes! //~ /esc:""|"ESC"|<nmtoken> //~ given name ("ESC" preferred, default=none=noop) //~ moves @#<DATE_CALENDAR_ESCAPE>@s into attributes //~ /sur:"SURN"|"S"|<nmtoken>|<!nmtoken> //~ alter node-name ("S" preferred, default="SURN") for slashed surname-part //~ to avoid double SURN-subnodes in an extended NAME-node/structure //~ a non-nametoken char/string prevents slash-replacement at all //~ // ----------------------------------------------------------------- //~ //~ Copyright (c) 2008 ff. Stefan Unterstein <http://www.unterstein.net/ged1212xml> //~ //~ By operation of rights, permission is hereby granted to copy, distribute and/or //~ modify this program under the terms of the GNU General Public License Version 3 //~ or any later version published by the Free Software Foundation. See the current //~ License at <http://www.gnu.org/licenses/gpl.html> for more details. //~ //~ Such free(d) "copylefted" software is distributed //~ WITHOUT ANY WARRANTY OF OR ABOUT ANYTHING but the "copyleft" itself. //~ // ----------------------------------------------------------------- // ----------------------------------------------------------------- // parse args, echo usage // ----------------------------------------------------------------- var oArgs = WScript.Arguments; var nArgs = WScript.Arguments.Named; // /name:value var uArgs = WScript.Arguments.Unnamed; var Usage = "Usage: cscript \/\/NoLogo [\/\/job:G2X] [absPath]"+WScript.ScriptName+" [\/ged:|\/xml:|\/log:[\"]file.ged|.xml|.log[\"]] [< stdin.ged] [> stdout.xml] [2> stderr.log] \n... view source for more options"; if (uArgs.length || nArgs.length>16 || oArgs.length!=nArgs.length) { WScript.Echo(Usage); WScript.Quit(1); } var aName = { "ged":"", "xml":"", "log":"", "pfx":"", "uri":"", "enc":"", "sty":"", "root":"", "id":"", "ref":"", "dtd":"", "xsd":"", "ifx":"", "esc":"", "sur":"", "ans":"" }; if (nArgs.length) // case-insensitive for NAME in /NAME:value pairs if lowercase name-key in aObject - { - for (var idx=0; idx<oArgs.length; idx++) { if (/^\/(\w+?):/.test(oArgs(idx)) && nArgs.Exists(RegExp.$1) && ((RegExp.$1).toLowerCase() in aName)) aName[(RegExp.$1).toLowerCase()]=nArgs.Item(RegExp.$1); } } - /* if (nArgs.length) // name-key case-sensitive for NAME in /NAME:value pairs { for (var idx in aName) { if (nArgs.Exists(idx)) aName[idx]=nArgs.Item(idx); } } */ var FSO = new ActiveXObject("Scripting.FileSystemObject"); - var aFile = { "ged" : ( (aName.ged) ? FSO.OpenTextFile (aName.ged, 1) : WScript.StdIn ), "xml" : ( (aName.xml) ? FSO.CreateTextFile (aName.xml, true) : WScript.StdOut ), "log" : ( (aName.log) ? FSO.CreateTextFile (aName.log, true) : WScript.StdErr ) } aName.ged = aName.ged||"STDIN"; aName.xml = aName.xml||"STDOUT"; aName.log = aName.log||"STDERR"; - if (aName.pfx && !aName.uri) { WScript.StdErr.WriteLine("Error: xmlNamespace \/pfx:Prefix w\/o \/uri:URI\n"+Usage); WScript.Quit(1); } aName.pfx = aName.pfx.replace(/\W/g,""); // ----------------------------------------------------------------- // ----------------------------------------------------------------- var $AP = Array.prototype; // var args = [].slice.call(arguments,0); String.prototype.attr = function(NMTOKEN) { return (this.valueOf())?" "+NMTOKEN+"=\""+this+"\"":""; } String.prototype.mark = function(NMTOKEN) { return (this.valueOf())?"<"+NMTOKEN+">"+this+"<\/"+NMTOKEN+">":"<"+NMTOKEN+" \/>"; } String.prototype.markup = function(/*NMTOKENS*/) { return "<"+$AP.join.call(arguments,"><")+">"+this+"<\/"+$AP.slice.call(arguments,0).reverse().join("><\/")+">"; } String.prototype.markUP = function(/*NMTOKENS*/) { return ["<",$AP.join.call(arguments,"><"),">",this,"<\/",$AP.slice.call(arguments,0).reverse().join("><\/").replace(/\s+[^>]*/gi,""),">"].join(""); } String.prototype.entify = function() { return (this.valueOf())?this.replace(/&/g,"&").replace(/</g,"<").replace(/>/g,">"):this; } Boolean.prototype.attr = String.prototype.attr; Boolean.prototype.mark = String.prototype.mark; var Report = ['@@report']; - var updateReport = function(s) { if (s instanceof Array) Report=Report.concat(s); else if ((typeof(s)=='string')||(s instanceof String)) Report[Report.length]=s; } - var returnReport = function() { return Report.join('\r\n')+'\r\n@@\r\n'; } // ----------------------------------------------------------------- // ----------------------------------------------------------------- var gedFile = aFile.ged; var gedFileName = aName.ged; var xmlFile = aFile.xml; var xmlFileName = aName.xml; var logFile = aFile.log; var logFileName = aName.log; var xmlEnc = aName.enc||"iso-8859-1"; var xmlStyle = (/\.(css|xsl)$/i.test(aName.sty)) ? "\n<?xml-stylesheet type=\"text\/"+(RegExp.$1).toLowerCase()+"\" href=\""+aName.sty+"\"?>" : "" ; var xmlRoot = aName.root||"GED"; var xmlID = aName.id||"ID"; var xmlIDREF = aName.ref||"REF"; var xmlnsATTR = (aName.uri) ? ((aName.pfx) ? " xmlns:"+aName.pfx+"=\""+aName.uri+"\"" : " xmlns=\""+aName.uri+"\"") : "" ; var xmlnsPFX = (aName.pfx) ? aName.pfx+":" : "" ; var xmlDTD = (aName.dtd) ? "\n<!DOCTYPE "+xmlnsPFX+xmlRoot+" SYSTEM \""+aName.dtd+"\">" : "" ; var xsiXSD = (aName.xsd) ? " xmlns:xsi=\"http:\/\/www.w3.org\/2001\/XMLSchema-instance\""+((aName.uri) ? " xsi:schemaLocation=\""+aName.uri+" " : " xsi:noNamespaceSchemaLocation=\"")+aName.xsd+"\"" : "" ; var encANSEL = Boolean(aName.ans)||false; if (encANSEL) { updateReport("GEDCOM input-encoding preset to ANSEL"); } var surNAME = (aName.sur) ? ((/^[a-zA-Z_]\w*$/.test(aName.sur)) ? aName.sur : "") : "SURN" ; var escDATE = aName.esc||""; var idPFX = aName.ifx||""; - var GEDCOM = function(LineStr,LineNum) { this.Line = LineStr; this.LNum = LineNum; this.Valid = /\s*([0-9][0-9]?)\s+([a-zA-Z0-9_@]\S*)(?:$|\s+)(.*)/.test(this.Line); this.Level = (isNaN(RegExp.$1)) ? 0 : parseInt(RegExp.$1,10); this.Token = RegExp.$2; this.Value = (RegExp.$3) ? (RegExp.$3).entify().replace(/@@/g,"@") : false ; this.Value = (this.Value && encANSEL) ? ANSELentify(this.Value) : this.Value ; this.Id = (/^@([a-zA-Z_0-9]\S*)@$/.test(this.Token)) ? idPFX+RegExp.$1 : false ; this.IdRef = (/^@([a-zA-Z_0-9]\S*)@$/.test(this.Value)) ? (this.Value=false,idPFX+RegExp.$1) : false ; this.PI = "" ; return this; } var gedPrev = new GEDCOM("0 NULL", 0); gedPrev.Level = -1; gedPrev.Token = ""; gedPrev.Value = false; gedPrev.Id = false; gedPrev.IdRef = false; gedPrev.PI = ""; var tagStack = []; var tabDepth = [""]; for (var i=1; i<99; i++) tabDepth[i]=tabDepth[i-1]+" "; // var tabDepth = ["","\t","\t\t","\t\t\t","\t\t\t\t","\t\t\t\t\t","\t\t\t\t\t\t","\t\t\t\t\t\t\t","\t\t\t\t\t\t\t\t","\t\t\t\t\t\t\t\t\t"]; - function xmlClosings(gedLevel,gedPrev) { if (gedLevel> gedPrev.Level) { xmlFile.WriteLine((gedPrev.Value)?"":">"); return; } if (gedLevel==gedPrev.Level) { xmlFile.WriteLine((gedPrev.Value)?tagStack[gedPrev.Level]:"/>"+gedPrev.PI); return; } - if (gedLevel <gedPrev.Level) { xmlFile.WriteLine((gedPrev.Value)?tagStack[gedPrev.Level]:"/>"+gedPrev.PI); - for (var lvl=gedPrev.Level-1; lvl>=gedLevel; lvl--) { xmlFile.WriteLine(tabDepth[lvl]+tagStack[lvl]); } } } var mm = { "JAN":"-01-", "FEB":"-02-", "MAR":"-03-", "APR":"-04-", "MAY":"-05-", "JUN":"-06-", "JUL":"-07-", "AUG":"-08-", "SEP":"-09-", "OCT":"-10-", "NOV":"-11-", "DEC":"-12-" }; - function gedPIDATE(DATE) { // ISO: YYYY-MM-DD - if (/^([0-3]?[0-9])\s+(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\s+([0-9]?[0-9]?[0-9]?[0-9])$/.test(DATE)) { return ["<?DATE ", ["0000","000","00","0",""][(RegExp.$3).length], RegExp.$3, mm[RegExp.$2], ["00","0",""][(RegExp.$1).length], RegExp.$1, "?>"].join(""); } - if (/^(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\s+([0-9]?[0-9]?[0-9]?[0-9])$/.test(DATE)) { return ["<?DATE ", ["0000","000","00","0",""][(RegExp.$2).length], RegExp.$2, mm[RegExp.$1], "00?>"].join(""); } - if (/^([0-9]?[0-9]?[0-9]?[0-9])$/.test(DATE)) { return ["<?DATE ", ["0000","000","00","0",""][(RegExp.$1).length], RegExp.$1, "-00-00?>"].join(""); } return ""; } // ----------------------------------------------------------------- // ----------------------------------------------------------------- xmlFile.WriteLine("<?xml version=\"1.0\" encoding=\""+xmlEnc+"\"?>"+xmlStyle); xmlFile.WriteLine("<!-- ================================================= -->"); xmlFile.WriteLine("<!-- GEDCOM one-to-one to XML by ged1212xml.wsf (StUs) -->"); xmlFile.WriteLine("<!-- script source at http:\/\/unterstein.net\/ged1212xml -->"); xmlFile.WriteLine("<!-- ================================================= -->"+xmlDTD); xmlFile.Write ("<"+xmlnsPFX+xmlRoot+xmlnsATTR+xsiXSD); gedRead: while (!gedFile.AtEndOfStream) - { var ged$0 = gedFile.ReadLine(); var gedNR = gedFile.Line-1; - if (/^\s*$/.test(ged$0)) { updateReport("empty input-line in "+gedFileName+":"+gedNR+" skipped"); continue gedRead; } var gedThis = new GEDCOM(ged$0,gedNR); - if (gedThis.Valid) { xmlClosings(gedThis.Level,gedPrev); } else { updateReport("invalid input-line in "+gedFileName+":"+gedNR+" skipped"); updateReport("invalid input-line is "+ged$0); continue gedRead; } - if (gedThis.Level>gedPrev.Level+1) { updateReport("invalid level-increment (>1) in "+gedFileName+":"+gedNR+" detected"); } - if (gedThis.Token=="CHAR") { encANSEL = (gedThis.Value.toUpperCase()=="ANSEL") ? true : false ; updateReport("input-encoding "+gedThis.Value+" in "+gedFileName+":"+gedNR+" detected"); } if (gedThis.Level==0) xmlFile.WriteLine(); switch (gedThis.Token) - { // ----------------------------------------------------------------- // if token ~ id, value ~ token [+value(s)?] // ----------------------------------------------------------------- case ("@"+String(gedThis.Id).substr(idPFX.length)+"@") : - if (/([a-zA-Z_0-9]+)\s+(\S.*)/.test(gedThis.Value)) { xmlFile.Write(tabDepth[gedThis.Level]+"<"+xmlnsPFX+RegExp.$1+gedThis.Id.attr(xmlID)+">"+RegExp.$2); tagStack[gedThis.Level]="<\/"+xmlnsPFX+RegExp.$1+">"+gedThis.PI; } else { xmlFile.Write(tabDepth[gedThis.Level]+"<"+xmlnsPFX+gedThis.Value+gedThis.Id.attr(xmlID)); tagStack[gedThis.Level]="<\/"+xmlnsPFX+gedThis.Value+">"+gedThis.PI; gedThis.Value=""; } break; // ----------------------------------------------------------------- // enclosing AT-SHARPs mark DATE @#DTOKEN@ date calendar escape; cf. http://homepages.rootsweb.ancestry.com/~pmcbride/gedcom/55gcch2.htm#DATE_CALENDAR_ESCAPE // ----------------------------------------------------------------- case "DATE" : - if (!!escDATE && (/@#(D(?:GREGORIAN|JULIAN|HEBREW|FRENCH R|ROMAN|UNKNOWN))@\s+/.test(gedThis.Value))) { xmlFile.Write(tabDepth[gedThis.Level]+"<"+xmlnsPFX+gedThis.Token+(RegExp.$1).attr(escDATE)+">"+RegExp.rightContext); gedThis.PI=gedPIDATE((RegExp.rightContext).toUpperCase()); } else { xmlFile.Write(tabDepth[gedThis.Level]+"<"+xmlnsPFX+gedThis.Token+((gedThis.Value)?">"+gedThis.Value:"")); gedThis.PI=(gedThis.Value) ? gedPIDATE(gedThis.Value.toUpperCase()) : "" ; } tagStack[gedThis.Level]="<\/"+xmlnsPFX+gedThis.Token+">"+gedThis.PI; break; // ----------------------------------------------------------------- // enclosing slashes mark /SURN/ surname // ----------------------------------------------------------------- case "NAME" : if (!!surNAME) gedThis.Value = gedThis.Value.replace(/\/([^\/]+)\//,"<"+xmlnsPFX+surNAME+">$1<\/"+xmlnsPFX+surNAME+">").replace(/\/\//g,"<"+xmlnsPFX+surNAME+"\/>"); // ----------------------------------------------------------------- // // ----------------------------------------------------------------- default : xmlFile.Write(tabDepth[gedThis.Level]+"<"+xmlnsPFX+gedThis.Token+gedThis.IdRef.attr(xmlIDREF)+((gedThis.Value)?">"+gedThis.Value:"")); tagStack[gedThis.Level]="<\/"+xmlnsPFX+gedThis.Token+">"+gedThis.PI; break; } gedPrev = gedThis; // hold'em } xmlClosings(0,gedPrev); xmlFile.Write("\n<\/"+xmlnsPFX+xmlRoot+">"); // ----------------------------------------------------------------- // Report + Close + Quit // ----------------------------------------------------------------- gedFile.Close(); xmlFile.Close(); logFile.WriteLine ( returnReport() ); logFile.Close(); WScript.Quit(0); // ----------------------------------------------------------------- </script> </job> </package>