Working Notebook
allLinks= Import[StringTemplate["http://www.planecrashinfo.com/`1`/`1`.htm"][#], "Hyperlinks"]&/@ Range[1920, 2017]; links= Flatten[Select[StringMatchQ["http://www.planecrashinfo.com/" ~~ __ ~~"/" ~~ __ ~~ "-" ~~ __ ~~ ".htm"]] /@ allLinks]; ��� dataDirectory= FileNameJoin[{NotebookDirectory[], "htmlFiles"}] �� /Users/christopher/Dropbox/mathematica/aviationAccidents/htmlFiles URLSaveAsynchronous[#, FileNameJoin[{dataDirectory, FileNameTake@#}], Identity@*List]; Pause[0.1]&/@ links; Dynamic[Length@AsynchronousTasks[], UpdateInterval→1] ��� rawData= Association[Apply[StringDrop[#1,-1]→#2 &] /@ Last@Import[#, "Data"]]&/@ FileNames["*htm", dataDirectory]; parseTime["?"]:= Missing[] parseTimestring_:= Blockt= ToString@string, t= StringDeleteStringReplace[t, ";"→":"], ExceptDigitCharacter ":"; IfStringFreeQ[t, ":"] && StringLength[t]>2, t= StringInsert[t, ":",-3]; Quiet@CheckTimeObject@t, Missing[] parsePeoplestring_:= First@StringCasesstring, StartOfString~~t__~~"(passengers:"~~p__~~" crew:"~~c__~~")"~~EndOfString⧴ Replace<|"Total"→t,"Passengers"→p,"Crew"→c|>,n_?StringMatchQNumberString ⧴ 2 ��� aviationAccidentData-03.nb accidents= Dataset[rawData]All, "Date" →Replace#,"?"→Missing[],_⧴DateObject[#]&, "Time"→ parseTime, "Location"→ Replace"?"→Missing[], "Operator"→ Replace"?"→Missing[], "Flight#"→ Replace"?"→Missing[], "Route"→ Replace"?"→Missing[], "AC Type"→ Replace"?"→Missing[], "Registration"→ Replace"?"→Missing[], "cn/ ln"→ Replace"?"→Missing[], "Aboard"→ parsePeople, "Fatalities"→ parsePeople, "Ground"→ Replace"?"→Missing[], "Summary"→
[Show full text]