Working Notebook
Total Page:16
File Type:pdf, Size:1020Kb
allLinks= Import[StringTemplate["http://www.planecrashinfo.com/`1`/`1`.htm"][#], "Hyperlinks"]&/@ Range[1920, 2017]; links= Flatten[Select[StringMatchQ["http://www.planecrashinfo.com/" ~~ __ ~~"/" ~~ __ ~~ "-" ~~ __ ~~ ".htm"]] /@ allLinks]; ��� dataDirectory= FileNameJoin[{NotebookDirectory[], "htmlFiles"}] �� /Users/christopher/Dropbox/mathematica/aviationAccidents/htmlFiles URLSaveAsynchronous[#, FileNameJoin[{dataDirectory, FileNameTake@#}], Identity@*List]; Pause[0.1]&/@ links; Dynamic[Length@AsynchronousTasks[], UpdateInterval→1] ��� rawData= Association[Apply[StringDrop[#1,-1]→#2 &] /@ Last@Import[#, "Data"]]&/@ FileNames["*htm", dataDirectory]; parseTime["?"]:= Missing[] parseTimestring_:= Blockt= ToString@string, t= StringDeleteStringReplace[t, ";"→":"], ExceptDigitCharacter ":"; IfStringFreeQ[t, ":"] && StringLength[t]>2, t= StringInsert[t, ":",-3]; Quiet@CheckTimeObject@t, Missing[] parsePeoplestring_:= First@StringCasesstring, StartOfString~~t__~~"(passengers:"~~p__~~" crew:"~~c__~~")"~~EndOfString⧴ Replace<|"Total"→t,"Passengers"→p,"Crew"→c|>,n_?StringMatchQNumberString ⧴ 2 ��� aviationAccidentData-03.nb accidents= Dataset[rawData]All, "Date" →Replace#,"?"→Missing[],_⧴DateObject[#]&, "Time"→ parseTime, "Location"→ Replace"?"→Missing[], "Operator"→ Replace"?"→Missing[], "Flight#"→ Replace"?"→Missing[], "Route"→ Replace"?"→Missing[], "AC Type"→ Replace"?"→Missing[], "Registration"→ Replace"?"→Missing[], "cn/ ln"→ Replace"?"→Missing[], "Aboard"→ parsePeople, "Fatalities"→ parsePeople, "Ground"→ Replace"?"→Missing[], "Summary"→ Replace"?"→Missing[] △ Date Time Location 24 Sep 1916 01:00:00 Billericay, England 1 Oct 1916 23:45:00 Potters Bar, England 21 Nov 1916 — Mainz, Germany 28 Nov 1916 23:45:00 Off West Hartlepool, England 4 Mar 1917 — Near Gent, Belgium 30 Mar 1917 — Off Northern Germany 14 May 1917 05:15:00 Near Texel Island, North Sea 14 Jun 1917 08:45:00 Off Vlieland Island, North Sea 17 Jun 1917 — Near Yarmouth, England 21 Aug 1917 07:00:00 Off western Denmark 17 Sep 1908 17:18:00 Fort Myer, Virginia 20 Oct 1917 07:45:00 Near Luneville, France 7 Apr 1918 21:30:00 Over the Mediterranean 10 May 1918 — Off Helgoland Island, Germany 11 Aug 1918 10:00:00 Ameland Island, North Sea 16 Dec 1918 — Elizabeth, New Jersey 25 May 1919 — Cleveland, Ohio 19 Jul 1919 — Dix Run, Pennsylvania 2 Aug 1919 — Verona, Italy ▽ 2 Oct 1919 — Newcastle, England showing 1–20 of 5746 aviationAccidentData-03.nb ���3 (*DumpSave[FileNameJoin[{NotebookDirectory[],"accidents.mx"}], accidents];*) Get[FileNameJoin[{NotebookDirectory[], "accidents.mx"}]] Basic Analysis accidents[DateHistogram[#, "Year"] &, "Date"] 100 80 60 40 20 1930 1952 1974 1996 2018 accidents[ DateHistogram[#, "Year", PlotRange→{{DateObject[{1960}], All}, All}] &, "Date"] 100 80 60 40 20 1970 1980 1990 2000 2010 4 ��� aviationAccidentData-03.nb accidentsSelect[#Operator === "Aeroflot" &] /* DateHistogram#, 2 yr , PlotRange→{{DateObject[{1960}], All}, All}&, "Date" 20 15 10 5 0 1970 1980 1990 2000 2010 accidents[Counts/* ReverseSort, "Operator"] △ Aeroflot 260 Military- U.S. Air Force 177 Air France 72 Deutsche Lufthansa 64 United Air Lines 44 China National Aviation Corporation 44 Military- U.S. Army Air Forces 43 Pan American World Airways 41 American Airlines 37 Military- Royal Air Force 36 Military- U.S. Navy 35 US Aerial Mail Service 35 Indian Airlines 34 KLM Royal Dutch Airlines 34 Private 34 Philippine Air Lines 33 Air Taxi 31 British Overseas Airways 29 Military- U.S. Army 27 Eastern Air Lines 25 showing 1–20 of 2787 ▽ aviationAccidentData-03.nb ���5 accidents[Counts/* Histogram, "Operator"] 2000 1500 1000 500 0 2 4 6 8 10 accidents[Counts/* Values/* FindDistribution, "Operator"] ZipfDistribution[1.55656] accidents[GroupBy["Operator"], Total, "Fatalities", "Total"][ReverseSort] △ Aeroflot 9048 Military- U.S. Air Force 3718 Air France 1748 American Airlines 1422 Pan American World Airways 1303 Military- U.S. Army Air Forces 1070 United Air Lines 1019 AVIANCA 941 Turkish Airlines(THY) 891 Indian Airlines 861 China Airlines(Taiwan) 847 Air India 827 Trans World Airlines 784 Japan Air Lines 764 Military- U.S. Navy 752 Pakistan International Airlines 743 Korean Airlines 712 Eastern Air Lines 710 Malaysia Airlines 671 KLM Royal Dutch Airlines 635 showing 1–20 of 2787 ▽ 6 ��� aviationAccidentData-03.nb accidents[GroupBy["Operator"] /* Histogram, Total, "Fatalities", "Total"] 1400 1200 1000 800 600 400 200 0 0 20 40 60 80 100 accidents[GroupBy["Operator"] /* Values/* FindDistribution, Total, "Fatalities", "Total"] MixtureDistribution[{0.318663, 0.681337}, {NegativeBinomialDistribution[4, 0.398983], LogSeriesDistribution[0.998502]}] aviationAccidentData-03.nb ���7 accidentsAll,{"Fatalities", "Aboard"} /* Values/* MergeApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1#2&, {"Crew", "Passengers"} △ Crew Passengers —— 1 — —— —— —— —— —— —— —— —— 0 1 —— —— —— —— 1 — 1 — 1 — 1 1 1 — ▽ showing 1–20 of 5746 8 ��� aviationAccidentData-03.nb accidentsAll,{"Fatalities", "Aboard"} /* Values/* MergeApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1#2&, {"Crew", "Passengers"}[Histogram[#, PlotRange→{{0, 1}, All}] &, "Passengers"] 2500 2000 1500 1000 500 0 0.2 0.4 0.6 0.8 1.0 accidentsAll,{"Fatalities", "Aboard"} /* Values/* MergeApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1#2&, {"Crew", "Passengers"}[Histogram[#, PlotRange→{{0, 1}, All}] &, "Crew"] 4000 3000 2000 1000 0 0.2 0.4 0.6 0.8 1.0 accidents[All, "Time"]DeleteMissing/*DateHistogram#, 0.5 h , PlotRange→ All& 120 100 80 60 40 20 13/4:00 13/8:00 13/12:00 13/16:00 13/20:00 14/0:00 aviationAccidentData-03.nb ���9 accidents[Histogram, "Aboard", "Total"] 1200 1000 800 600 400 200 0 0 20 40 60 80 100 accidentsListPlot,{Query["Aboard", "Total"], Query["Fatalities", "Total"]} /* ApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1,#2#1& 1.0 0.8 0.6 0.4 0.2 20 40 60 80 10 ��� aviationAccidentData-03.nb accidentsDensityHistogram#,{{1},{0.1}}, PlotRange→{{0, 100},{0, 1}}, AspectRatio→1 GoldenRatio&, {Query["Aboard", "Total"], Query["Fatalities", "Total"]} /* ApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1,#2#1& 1.0 0.8 0.6 0.4 0.2 0.0 0 20 40 60 80 100 accidentsAll,{Query["Aboard", "Total"], Query["Fatalities", "Total"]} /* ApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1,#2#1&[ DeleteMissing/* GroupBy[First→ Last]][All, Mean][ Normal/* SparseArray/* ListPlot] 1.0 0.8 0.6 0.4 0.2 100 200 300 400 500 600 aviationAccidentData-03.nb ���11 accidentsAll,{Query["Aboard", "Total"], Query["Fatalities", "Total"]} /* ApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1,#2#1&[ DeleteMissing/* GroupBy[First→ Last]][All, Mean] Normal/* SparseArray/*ListPlot[#, PlotRange→{{0, 200},{0, 1}}]& 1.0 0.8 0.6 0.4 0.2 0.0 0 50 100 150 200 accidentsAll,{Query["Aboard", "Total"], Query["Fatalities", "Total"]} /* ApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1,#2#1& DeleteMissing,1→Round[#, 3] +1&[GroupBy[First→ Last]][All, Mean] Normal/* SparseArray/*ListPlot[#, PlotRange→{{0, 200},{0, 1}}]& 1.0 0.8 0.6 0.4 0.2 0.0 0 50 100 150 200 12 ��� aviationAccidentData-03.nb accidentsAll,{Query["Aboard", "Total"], Query["Fatalities", "Total"]} /* ApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1,#2#1& DeleteMissing,1→Round[#, 10] +1&[GroupBy[First→ Last]][All, Mean] Normal/* SparseArray/*ListPlot[#, PlotRange→{All,{0, 1}}]& 1.0 0.8 0.6 0.4 0.2 0 100 200 300 400 500 600 accidents[{Query[Total, "Fatalities", "Total"], Query[Total, "Aboard", "Total"]} /* Apply[Divide]] //N 0.727889 accidents[Select[#Operator === "Aeroflot" &] /* {Query[Total, "Fatalities", "Total"], Query[Total, "Aboard", "Total"]} /* Apply[Divide]] //N 0.781955 accidents[ Select[#Operator === "United Air Lines" &] /* {Query[Total, "Fatalities", "Total"], Query[Total, "Aboard", "Total"]} /* Apply[Divide]] //N 0.414228 accidents[Union/* DeleteMissing/* Select[StringContainsQ["Aeroflot"]], "Operator"] Aeroflot Aeroflot/ Aeroflot Aeroflot/ Kazakhstan Aeroflot/Military- Russian Aeroflot/ Military- Russian Air Force Aeroflot Russian International Airways Aeroflot/ Soviet Air Force russianPlanes={"Beriev", "Ilyushin", "Voronezh", "Irkut", "BETA", "Irkutsk", "IRKUT", "AviaSTEP", "Russian", "Russia", "Mikoyan", "Sokol", "Sukhoi", "Komsomolsk", "Novosibirsk", "Tupolev", "Yakovlev", "Aviastar", "Kazan"}; aviationAccidentData-03.nb ���13 accidentsGroupBy["Operator"] /* ReverseSort, IfLength@DeleteMissing@# === 0, 0, NTotal@Boole@DeleteMissing@# Length@DeleteMissing@#&, "AC Type"/* Replace[ s_String⧴ StringContainsQ[s, Alternatives@@ russianPlanes, IgnoreCase→ True]] △ Russian Air Force 1. Sukhoi 1. Red Wings Airlines 1. YAK Service 1. Kogalmavia 1. Silk Way Airlines 1. RusAir Airlines 1. Dagestan Airlines 1. Sun Way 1. Russian Ministry of Interior 1. Aria Airlines 1. Caspian Airlines 1. Pulkovo Airlines 1. Air West 1. Azov Avia Airlines 1. Airline Transport 1. Volga-Avia Express 1. AZAL Cargo Company 1. Euro Asia Aviation 1. Ukranian-Mediterranean Airlines 1. showing 1–20 of 2787 ▽ 14 ��� aviationAccidentData-03.nb accidents[Select[#Operator === "Aeroflot" &] /* Counts/* ReverseSort, "AC Type"] △ Yakovlev YAK-40 19 Antonov AN-24 13 Ilyushin IL-12 13 Ilyushin IL-14P 11 Tupolev TU-134A 10 Tupolev TU-104B 10 Ilyushin IL-18B 9 Tupolev TU-124 8 Li-2 8 Antonov AN-26 6 Antonov An-24B 6 Ilyushin IL-14 6 Tupolev TU-104A 6 Ilyushin IL-18V 5 Ilyushin IL-18 5 Antonov AN-10 4 Lisunov Li-2 4 Tupolev TU-154B-2 3 Tupolev TU-154B 3 Ilyushin IL-62M 3 showing 1–20 of 105 ▽ aviationAccidentData-03.nb ���15 accidents[Select[#Aboard["Total"]