allLinks= Import[StringTemplate["http://www.planecrashinfo.com/`1`/`1`.htm"][#], "Hyperlinks"]&/@ Range[1920, 2017]; links= Flatten[Select[StringMatchQ["http://www.planecrashinfo.com/" ~~ __ ~~"/" ~~ __ ~~ "-" ~~ __ ~~ ".htm"]] /@ allLinks];
��� dataDirectory= FileNameJoin[{NotebookDirectory[], "htmlFiles"}]
�� /Users/christopher/Dropbox/mathematica/aviationAccidents/htmlFiles
URLSaveAsynchronous[#, FileNameJoin[{dataDirectory, FileNameTake@#}], Identity@*List]; Pause[0.1]&/@ links;
Dynamic[Length@AsynchronousTasks[], UpdateInterval→1]
��� rawData= Association[Apply[StringDrop[#1,-1]→#2 &] /@ Last@Import[#, "Data"]]&/@ FileNames["*htm", dataDirectory];
parseTime["?"]:= Missing[] parseTimestring_:= Blockt= ToString@string, t= StringDeleteStringReplace[t, ";"→":"], ExceptDigitCharacter ":"; IfStringFreeQ[t, ":"] && StringLength[t]>2, t= StringInsert[t, ":",-3]; Quiet@CheckTimeObject@t, Missing[]
parsePeoplestring_:= First@StringCasesstring, StartOfString~~t__~~"(passengers:"~~p__~~" crew:"~~c__~~")"~~EndOfString⧴ Replace<|"Total"→t,"Passengers"→p,"Crew"→c|>,n_?StringMatchQNumberString ⧴ 2 ��� aviationAccidentData-03.nb
accidents= Dataset[rawData]All, "Date" →Replace#,"?"→Missing[],_⧴DateObject[#]&, "Time"→ parseTime, "Location"→ Replace"?"→Missing[], "Operator"→ Replace"?"→Missing[], "Flight#"→ Replace"?"→Missing[], "Route"→ Replace"?"→Missing[], "AC Type"→ Replace"?"→Missing[], "Registration"→ Replace"?"→Missing[], "cn/ ln"→ Replace"?"→Missing[], "Aboard"→ parsePeople, "Fatalities"→ parsePeople, "Ground"→ Replace"?"→Missing[], "Summary"→ Replace"?"→Missing[]
△ Date Time Location
24 Sep 1916 01:00:00 Billericay, England 1 Oct 1916 23:45:00 Potters Bar, England 21 Nov 1916 — Mainz, Germany 28 Nov 1916 23:45:00 Off West Hartlepool, England 4 Mar 1917 — Near Gent, Belgium 30 Mar 1917 — Off Northern Germany 14 May 1917 05:15:00 Near Texel Island, North Sea 14 Jun 1917 08:45:00 Off Vlieland Island, North Sea 17 Jun 1917 — Near Yarmouth, England 21 Aug 1917 07:00:00 Off western Denmark 17 Sep 1908 17:18:00 Fort Myer, Virginia 20 Oct 1917 07:45:00 Near Luneville, France 7 Apr 1918 21:30:00 Over the Mediterranean 10 May 1918 — Off Helgoland Island, Germany 11 Aug 1918 10:00:00 Ameland Island, North Sea 16 Dec 1918 — Elizabeth, New Jersey 25 May 1919 — Cleveland, Ohio 19 Jul 1919 — Dix Run, Pennsylvania 2 Aug 1919 — Verona, Italy ▽ 2 Oct 1919 — Newcastle, England
showing 1–20 of 5746 aviationAccidentData-03.nb ���3
(*DumpSave[FileNameJoin[{NotebookDirectory[],"accidents.mx"}], accidents];*)
Get[FileNameJoin[{NotebookDirectory[], "accidents.mx"}]]
Basic Analysis
accidents[DateHistogram[#, "Year"] &, "Date"]
100
80
60
40
20
1930 1952 1974 1996 2018
accidents[ DateHistogram[#, "Year", PlotRange→{{DateObject[{1960}], All}, All}] &, "Date"]
100
80
60
40
20
1970 1980 1990 2000 2010 4 ��� aviationAccidentData-03.nb
accidentsSelect[#Operator === "Aeroflot" &] /* DateHistogram#, 2 yr , PlotRange→{{DateObject[{1960}], All}, All}&, "Date"
20
15
10
5
0 1970 1980 1990 2000 2010
accidents[Counts/* ReverseSort, "Operator"] △ Aeroflot 260
Military- U.S. Air Force 177 Air France 72 Deutsche Lufthansa 64 United Air Lines 44 China National Aviation Corporation 44
Military- U.S. Army Air Forces 43 Pan American World Airways 41 American Airlines 37
Military- Royal Air Force 36 Military- U.S. Navy 35 US Aerial Mail Service 35 Indian Airlines 34 KLM Royal Dutch Airlines 34 Private 34 Philippine Air Lines 33 Air Taxi 31 British Overseas Airways 29
Military- U.S. Army 27 Eastern Air Lines 25
showing 1–20 of 2787 ▽ aviationAccidentData-03.nb ���5
accidents[Counts/* Histogram, "Operator"]
2000
1500
1000
500
0 2 4 6 8 10 accidents[Counts/* Values/* FindDistribution, "Operator"] ZipfDistribution[1.55656] accidents[GroupBy["Operator"], Total, "Fatalities", "Total"][ReverseSort] △ Aeroflot 9048
Military- U.S. Air Force 3718 Air France 1748 American Airlines 1422 Pan American World Airways 1303
Military- U.S. Army Air Forces 1070 United Air Lines 1019 AVIANCA 941
Turkish Airlines(THY) 891 Indian Airlines 861
China Airlines(Taiwan) 847 Air India 827 Trans World Airlines 784 Japan Air Lines 764
Military- U.S. Navy 752 Pakistan International Airlines 743 Korean Airlines 712 Eastern Air Lines 710 Malaysia Airlines 671 KLM Royal Dutch Airlines 635
showing 1–20 of 2787 ▽ 6 ��� aviationAccidentData-03.nb
accidents[GroupBy["Operator"] /* Histogram, Total, "Fatalities", "Total"]
1400
1200
1000
800
600
400
200
0 0 20 40 60 80 100
accidents[GroupBy["Operator"] /* Values/* FindDistribution, Total, "Fatalities", "Total"] MixtureDistribution[{0.318663, 0.681337}, {NegativeBinomialDistribution[4, 0.398983], LogSeriesDistribution[0.998502]}] aviationAccidentData-03.nb ���7
accidentsAll,{"Fatalities", "Aboard"} /* Values/* MergeApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1#2&, {"Crew", "Passengers"} △ Crew Passengers —— 1 — —— —— —— —— —— —— —— —— 0 1 —— —— —— —— 1 — 1 — 1 — 1 1 1 —
▽ showing 1–20 of 5746 8 ��� aviationAccidentData-03.nb
accidentsAll,{"Fatalities", "Aboard"} /* Values/* MergeApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1#2&, {"Crew", "Passengers"}[Histogram[#, PlotRange→{{0, 1}, All}] &, "Passengers"]
2500
2000
1500
1000
500
0 0.2 0.4 0.6 0.8 1.0
accidentsAll,{"Fatalities", "Aboard"} /* Values/* MergeApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1#2&, {"Crew", "Passengers"}[Histogram[#, PlotRange→{{0, 1}, All}] &, "Crew"]
4000
3000
2000
1000
0 0.2 0.4 0.6 0.8 1.0
accidents[All, "Time"]DeleteMissing/*DateHistogram#, 0.5 h , PlotRange→ All&
120
100
80
60
40
20
13/4:00 13/8:00 13/12:00 13/16:00 13/20:00 14/0:00 aviationAccidentData-03.nb ���9
accidents[Histogram, "Aboard", "Total"]
1200
1000
800
600
400
200
0 0 20 40 60 80 100 accidentsListPlot,{Query["Aboard", "Total"], Query["Fatalities", "Total"]} /* ApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1,#2#1&
1.0
0.8
0.6
0.4
0.2
20 40 60 80 10 ��� aviationAccidentData-03.nb
accidentsDensityHistogram#,{{1},{0.1}}, PlotRange→{{0, 100},{0, 1}}, AspectRatio→1 GoldenRatio&, {Query["Aboard", "Total"], Query["Fatalities", "Total"]} /* ApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1,#2#1&
1.0
0.8
0.6
0.4
0.2
0.0
0 20 40 60 80 100
accidentsAll,{Query["Aboard", "Total"], Query["Fatalities", "Total"]} /* ApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1,#2#1&[ DeleteMissing/* GroupBy[First→ Last]][All, Mean][ Normal/* SparseArray/* ListPlot]
1.0
0.8
0.6
0.4
0.2
100 200 300 400 500 600 aviationAccidentData-03.nb ���11
accidentsAll,{Query["Aboard", "Total"], Query["Fatalities", "Total"]} /* ApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1,#2#1&[ DeleteMissing/* GroupBy[First→ Last]][All, Mean] Normal/* SparseArray/*ListPlot[#, PlotRange→{{0, 200},{0, 1}}]&
1.0
0.8
0.6
0.4
0.2
0.0 0 50 100 150 200 accidentsAll,{Query["Aboard", "Total"], Query["Fatalities", "Total"]} /* ApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1,#2#1& DeleteMissing,1→Round[#, 3] +1&[GroupBy[First→ Last]][All, Mean] Normal/* SparseArray/*ListPlot[#, PlotRange→{{0, 200},{0, 1}}]&
1.0
0.8
0.6
0.4
0.2
0.0 0 50 100 150 200 12 ��� aviationAccidentData-03.nb
accidentsAll,{Query["Aboard", "Total"], Query["Fatalities", "Total"]} /* ApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1,#2#1& DeleteMissing,1→Round[#, 10] +1&[GroupBy[First→ Last]][All, Mean] Normal/* SparseArray/*ListPlot[#, PlotRange→{All,{0, 1}}]&
1.0
0.8
0.6
0.4
0.2
0 100 200 300 400 500 600
accidents[{Query[Total, "Fatalities", "Total"], Query[Total, "Aboard", "Total"]} /* Apply[Divide]] //N 0.727889
accidents[Select[#Operator === "Aeroflot" &] /* {Query[Total, "Fatalities", "Total"], Query[Total, "Aboard", "Total"]} /* Apply[Divide]] //N 0.781955
accidents[ Select[#Operator === "United Air Lines" &] /* {Query[Total, "Fatalities", "Total"], Query[Total, "Aboard", "Total"]} /* Apply[Divide]] //N 0.414228
accidents[Union/* DeleteMissing/* Select[StringContainsQ["Aeroflot"]], "Operator"]
Aeroflot
Aeroflot/ Aeroflot Aeroflot/ Kazakhstan Aeroflot/Military- Russian Aeroflot/ Military- Russian Air Force Aeroflot Russian International Airways
Aeroflot/ Soviet Air Force
russianPlanes={"Beriev", "Ilyushin", "Voronezh", "Irkut", "BETA", "Irkutsk", "IRKUT", "AviaSTEP", "Russian", "Russia", "Mikoyan", "Sokol", "Sukhoi", "Komsomolsk", "Novosibirsk", "Tupolev", "Yakovlev", "Aviastar", "Kazan"}; aviationAccidentData-03.nb ���13
accidentsGroupBy["Operator"] /* ReverseSort, IfLength@DeleteMissing@# === 0, 0, NTotal@Boole@DeleteMissing@# Length@DeleteMissing@#&, "AC Type"/* Replace[ s_String⧴ StringContainsQ[s, Alternatives@@ russianPlanes, IgnoreCase→ True]] △ Russian Air Force 1. Sukhoi 1. Red Wings Airlines 1. YAK Service 1. Kogalmavia 1. Silk Way Airlines 1. RusAir Airlines 1. Dagestan Airlines 1. Sun Way 1. Russian Ministry of Interior 1. Aria Airlines 1. Caspian Airlines 1. Pulkovo Airlines 1. Air West 1. Azov Avia Airlines 1. Airline Transport 1.
Volga-Avia Express 1. AZAL Cargo Company 1. Euro Asia Aviation 1.
Ukranian-Mediterranean Airlines 1.
showing 1–20 of 2787 ▽ 14 ��� aviationAccidentData-03.nb
accidents[Select[#Operator === "Aeroflot" &] /* Counts/* ReverseSort, "AC Type"] △ Yakovlev YAK-40 19 Antonov AN-24 13 Ilyushin IL-12 13 Ilyushin IL-14P 11 Tupolev TU-134A 10 Tupolev TU-104B 10 Ilyushin IL-18B 9 Tupolev TU-124 8 Li-2 8 Antonov AN-26 6 Antonov An-24B 6 Ilyushin IL-14 6 Tupolev TU-104A 6 Ilyushin IL-18V 5 Ilyushin IL-18 5 Antonov AN-10 4 Lisunov Li-2 4 Tupolev TU-154B-2 3 Tupolev TU-154B 3 Ilyushin IL-62M 3
showing 1–20 of 105 ▽ aviationAccidentData-03.nb ���15
accidents[Select[#Aboard["Total"] =!= #Aboard["Passengers"] + #Aboard["Crew"]&& ! MissingQ[#Aboard["Passengers"]]&&! MissingQ[#Aboard["Crew"]]&]] △ Date Time Location
11 Dec 1919 — Catherham, Surrey, UK 20 Jun 1920 — Cartagena, Columbia 6 Apr 1921 — Point Cook, Australia 12 Jan 1927 — Estaires, France 19 Dec 1929 — Near Berlin, Germany 22 Feb 1938 — Pontoise, France 9 Feb 1938 — Guadalupe, Mexico 13 Jul 1949 — Jamnagar, Gujarat state, India 5 Oct 1952 16:02:00 Skvoritsy, Russia 27 May 1953 03:50:00 Goose Wade, Russia 1 Feb 1959 23:37:00 Kerrville, Texas 9 Jan 1966 17:55:00 Chipaque, Cundinamarca, Colombia 6 Jan 1968 18:27:00 Near Olekminsk, Russia 5 Dec 1969 15:00:00 Nnear Albuquerque, New Mexico 29 Jan 1970 19:27:00 Near Murmansk, Russia 1 Oct 1972 19:25:00 Near Adler, USSR 11 May 1973 04:37:00 Near Semipalatinsk, USSR 30 Jun 1973 — Amman, Jordan 31 Jul 1975 — Taipei, Taiwan ▽ 9 Feb 1976 08:15:00 Irkutsk, Russia
showing 1–20 of 27