allLinks= Import[StringTemplate["http://www.planecrashinfo.com/`1`/`1`.htm"][#], "Hyperlinks"]&/@ Range[1920, 2017]; links= Flatten[Select[StringMatchQ["http://www.planecrashinfo.com/" ~~ __ ~~"/" ~~ __ ~~ "-" ~~ __ ~~ ".htm"]] /@ allLinks];

��� dataDirectory= FileNameJoin[{NotebookDirectory[], "htmlFiles"}]

�� /Users/christopher/Dropbox/mathematica/aviationAccidents/htmlFiles

URLSaveAsynchronous[#, FileNameJoin[{dataDirectory, FileNameTake@#}], Identity@*List]; Pause[0.1]&/@ links;

Dynamic[Length@AsynchronousTasks[], UpdateInterval→1]

��� rawData= Association[Apply[StringDrop[#1,-1]→#2 &] /@ Last@Import[#, "Data"]]&/@ FileNames["*htm", dataDirectory];

parseTime["?"]:= Missing[] parseTimestring_:= Blockt= ToString@string, t= StringDeleteStringReplace[t, ";"→":"], ExceptDigitCharacter ":"; IfStringFreeQ[t, ":"] && StringLength[t]>2, t= StringInsert[t, ":",-3]; Quiet@CheckTimeObject@t, Missing[] 

parsePeoplestring_:= First@StringCasesstring, StartOfString~~t__~~"(passengers:"~~p__~~" crew:"~~c__~~")"~~EndOfString⧴ Replace<|"Total"→t,"Passengers"→p,"Crew"→c|>,n_?StringMatchQNumberString ⧴  2 ��� aviationAccidentData-03.nb

accidents= Dataset[rawData]All, "Date" →Replace#,"?"→Missing[],_⧴DateObject[#]&, "Time"→ parseTime, "Location"→ Replace"?"→Missing[], "Operator"→ Replace"?"→Missing[], "Flight#"→ Replace"?"→Missing[], "Route"→ Replace"?"→Missing[], "AC Type"→ Replace"?"→Missing[], "Registration"→ Replace"?"→Missing[], "cn/ ln"→ Replace"?"→Missing[], "Aboard"→ parsePeople, "Fatalities"→ parsePeople, "Ground"→ Replace"?"→Missing[], "Summary"→ Replace"?"→Missing[] 

△ Date Time Location

24 Sep 1916 01:00:00 Billericay, England 1 Oct 1916 23:45:00 Potters Bar, England 21 Nov 1916 — Mainz, Germany 28 Nov 1916 23:45:00 Off West Hartlepool, England 4 Mar 1917 — Near Gent, Belgium 30 Mar 1917 — Off Northern Germany 14 May 1917 05:15:00 Near Texel Island, North Sea 14 Jun 1917 08:45:00 Off Vlieland Island, North Sea 17 Jun 1917 — Near Yarmouth, England 21 Aug 1917 07:00:00 Off western Denmark 17 Sep 1908 17:18:00 Fort Myer, Virginia 20 Oct 1917 07:45:00 Near Luneville, France 7 Apr 1918 21:30:00 Over the Mediterranean 10 May 1918 — Off Helgoland Island, Germany 11 Aug 1918 10:00:00 Ameland Island, North Sea 16 Dec 1918 — Elizabeth, New Jersey 25 May 1919 — Cleveland, Ohio 19 Jul 1919 — Dix Run, Pennsylvania 2 Aug 1919 — Verona, Italy ▽ 2 Oct 1919 — Newcastle, England

showing 1–20 of 5746 aviationAccidentData-03.nb ���3

(*DumpSave[FileNameJoin[{NotebookDirectory[],"accidents.mx"}], accidents];*)

Get[FileNameJoin[{NotebookDirectory[], "accidents.mx"}]]

Basic Analysis

accidents[DateHistogram[#, "Year"] &, "Date"]

100

80

60

40

20

1930 1952 1974 1996 2018

accidents[ DateHistogram[#, "Year", PlotRange→{{DateObject[{1960}], All}, All}] &, "Date"]

100

80

60

40

20

1970 1980 1990 2000 2010 4 ��� aviationAccidentData-03.nb

accidentsSelect[#Operator === "" &] /* DateHistogram#, 2 yr , PlotRange→{{DateObject[{1960}], All}, All}&, "Date"

20

15

10

5

0 1970 1980 1990 2000 2010

accidents[Counts/* ReverseSort, "Operator"] △ Aeroflot 260

Military- U.S. Air Force 177 Air France 72 Deutsche Lufthansa 64 United Air Lines 44 China National Aviation Corporation 44

Military- U.S. Army Air Forces 43 Pan American World Airways 41 American Airlines 37

Military- Royal Air Force 36 Military- U.S. Navy 35 US Aerial Mail Service 35 Indian Airlines 34 KLM Royal Dutch Airlines 34 Private 34 Philippine Air Lines 33 Air Taxi 31 British Overseas Airways 29

Military- U.S. Army 27 Eastern Air Lines 25

showing 1–20 of 2787 ▽ aviationAccidentData-03.nb ���5

accidents[Counts/* Histogram, "Operator"]

2000

1500

1000

500

0 2 4 6 8 10 accidents[Counts/* Values/* FindDistribution, "Operator"] ZipfDistribution[1.55656] accidents[GroupBy["Operator"], Total, "Fatalities", "Total"][ReverseSort] △ Aeroflot 9048

Military- U.S. Air Force 3718 Air France 1748 American Airlines 1422 Pan American World Airways 1303

Military- U.S. Army Air Forces 1070 United Air Lines 1019 AVIANCA 941

Turkish Airlines(THY) 891 Indian Airlines 861

China Airlines(Taiwan) 847 Air India 827 Trans World Airlines 784 Japan Air Lines 764

Military- U.S. Navy 752 Pakistan International Airlines 743 Korean Airlines 712 Eastern Air Lines 710 Malaysia Airlines 671 KLM Royal Dutch Airlines 635

showing 1–20 of 2787 ▽ 6 ��� aviationAccidentData-03.nb

accidents[GroupBy["Operator"] /* Histogram, Total, "Fatalities", "Total"]

1400

1200

1000

800

600

400

200

0 0 20 40 60 80 100

accidents[GroupBy["Operator"] /* Values/* FindDistribution, Total, "Fatalities", "Total"] MixtureDistribution[{0.318663, 0.681337}, {NegativeBinomialDistribution[4, 0.398983], LogSeriesDistribution[0.998502]}] aviationAccidentData-03.nb ���7

accidentsAll,{"Fatalities", "Aboard"} /* Values/* MergeApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1#2&, {"Crew", "Passengers"} △ Crew Passengers —— 1 — —— —— —— —— —— —— —— —— 0 1 —— —— —— —— 1 — 1 — 1 — 1 1 1 —

▽ showing 1–20 of 5746 8 ��� aviationAccidentData-03.nb

accidentsAll,{"Fatalities", "Aboard"} /* Values/* MergeApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1#2&, {"Crew", "Passengers"}[Histogram[#, PlotRange→{{0, 1}, All}] &, "Passengers"]

2500

2000

1500

1000

500

0 0.2 0.4 0.6 0.8 1.0

accidentsAll,{"Fatalities", "Aboard"} /* Values/* MergeApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1#2&, {"Crew", "Passengers"}[Histogram[#, PlotRange→{{0, 1}, All}] &, "Crew"]

4000

3000

2000

1000

0 0.2 0.4 0.6 0.8 1.0

accidents[All, "Time"]DeleteMissing/*DateHistogram#, 0.5 h , PlotRange→ All&

120

100

80

60

40

20

13/4:00 13/8:00 13/12:00 13/16:00 13/20:00 14/0:00 aviationAccidentData-03.nb ���9

accidents[Histogram, "Aboard", "Total"]

1200

1000

800

600

400

200

0 0 20 40 60 80 100 accidentsListPlot,{Query["Aboard", "Total"], Query["Fatalities", "Total"]} /* ApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1,#2#1&

1.0

0.8

0.6

0.4

0.2

20 40 60 80 10 ��� aviationAccidentData-03.nb

accidentsDensityHistogram#,{{1},{0.1}}, PlotRange→{{0, 100},{0, 1}}, AspectRatio→1 GoldenRatio&, {Query["Aboard", "Total"], Query["Fatalities", "Total"]} /* ApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1,#2#1&

1.0

0.8

0.6

0.4

0.2

0.0

0 20 40 60 80 100

accidentsAll,{Query["Aboard", "Total"], Query["Fatalities", "Total"]} /* ApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1,#2#1&[ DeleteMissing/* GroupBy[First→ Last]][All, Mean][ Normal/* SparseArray/* ListPlot]

1.0

0.8

0.6

0.4

0.2

100 200 300 400 500 600 aviationAccidentData-03.nb ���11

accidentsAll,{Query["Aboard", "Total"], Query["Fatalities", "Total"]} /* ApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1,#2#1&[ DeleteMissing/* GroupBy[First→ Last]][All, Mean] Normal/* SparseArray/*ListPlot[#, PlotRange→{{0, 200},{0, 1}}]&

1.0

0.8

0.6

0.4

0.2

0.0 0 50 100 150 200 accidentsAll,{Query["Aboard", "Total"], Query["Fatalities", "Total"]} /* ApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1,#2#1& DeleteMissing,1→Round[#, 3] +1&[GroupBy[First→ Last]][All, Mean] Normal/* SparseArray/*ListPlot[#, PlotRange→{{0, 200},{0, 1}}]&

1.0

0.8

0.6

0.4

0.2

0.0 0 50 100 150 200 12 ��� aviationAccidentData-03.nb

accidentsAll,{Query["Aboard", "Total"], Query["Fatalities", "Total"]} /* ApplyIfMissingQ[#1] || MissingQ[#2] || #2 === 0, Missing[],#1,#2#1& DeleteMissing,1→Round[#, 10] +1&[GroupBy[First→ Last]][All, Mean] Normal/* SparseArray/*ListPlot[#, PlotRange→{All,{0, 1}}]&

1.0

0.8

0.6

0.4

0.2

0 100 200 300 400 500 600

accidents[{Query[Total, "Fatalities", "Total"], Query[Total, "Aboard", "Total"]} /* Apply[Divide]] //N 0.727889

accidents[Select[#Operator === "Aeroflot" &] /* {Query[Total, "Fatalities", "Total"], Query[Total, "Aboard", "Total"]} /* Apply[Divide]] //N 0.781955

accidents[ Select[#Operator === "United Air Lines" &] /* {Query[Total, "Fatalities", "Total"], Query[Total, "Aboard", "Total"]} /* Apply[Divide]] //N 0.414228

accidents[Union/* DeleteMissing/* Select[StringContainsQ["Aeroflot"]], "Operator"]

Aeroflot

Aeroflot/ Aeroflot Aeroflot/ Kazakhstan Aeroflot/Military- Russian Aeroflot/ Military- Russian Air Force Aeroflot Russian International Airways

Aeroflot/ Soviet Air Force

russianPlanes={"Beriev", "Ilyushin", "Voronezh", "Irkut", "BETA", "Irkutsk", "IRKUT", "AviaSTEP", "Russian", "Russia", "Mikoyan", "Sokol", "Sukhoi", "Komsomolsk", "Novosibirsk", "Tupolev", "Yakovlev", "Aviastar", "Kazan"}; aviationAccidentData-03.nb ���13

accidentsGroupBy["Operator"] /* ReverseSort, IfLength@DeleteMissing@# === 0, 0, NTotal@Boole@DeleteMissing@# Length@DeleteMissing@#&, "AC Type"/* Replace[ s_String⧴ StringContainsQ[s, Alternatives@@ russianPlanes, IgnoreCase→ True]] △ Russian Air Force 1. Sukhoi 1. 1. YAK Service 1. Kogalmavia 1. Silk Way Airlines 1. RusAir Airlines 1. Dagestan Airlines 1. Sun Way 1. Russian Ministry of Interior 1. Aria Airlines 1. Caspian Airlines 1. Pulkovo Airlines 1. Air West 1. Azov Avia Airlines 1. Airline Transport 1.

Volga-Avia Express 1. AZAL Cargo Company 1. Euro Asia Aviation 1.

Ukranian-Mediterranean Airlines 1.

showing 1–20 of 2787 ▽ 14 ��� aviationAccidentData-03.nb

accidents[Select[#Operator === "Aeroflot" &] /* Counts/* ReverseSort, "AC Type"] △ Yakovlev YAK-40 19 Antonov AN-24 13 Ilyushin IL-12 13 Ilyushin IL-14P 11 Tupolev TU-134A 10 Tupolev TU-104B 10 Ilyushin IL-18B 9 Tupolev TU-124 8 Li-2 8 Antonov AN-26 6 Antonov An-24B 6 Ilyushin IL-14 6 Tupolev TU-104A 6 Ilyushin IL-18V 5 Ilyushin IL-18 5 Antonov AN-10 4 Lisunov Li-2 4 Tupolev TU-154B-2 3 Tupolev TU-154B 3 Ilyushin IL-62M 3

showing 1–20 of 105 ▽ aviationAccidentData-03.nb ���15

accidents[Select[#Aboard["Total"] =!= #Aboard["Passengers"] + #Aboard["Crew"]&& ! MissingQ[#Aboard["Passengers"]]&&! MissingQ[#Aboard["Crew"]]&]] △ Date Time Location

11 Dec 1919 — Catherham, Surrey, UK 20 Jun 1920 — Cartagena, Columbia 6 Apr 1921 — Point Cook, Australia 12 Jan 1927 — Estaires, France 19 Dec 1929 — Near Berlin, Germany 22 Feb 1938 — Pontoise, France 9 Feb 1938 — Guadalupe, Mexico 13 Jul 1949 — Jamnagar, Gujarat state, India 5 Oct 1952 16:02:00 Skvoritsy, Russia 27 May 1953 03:50:00 Goose Wade, Russia 1 Feb 1959 23:37:00 Kerrville, Texas 9 Jan 1966 17:55:00 Chipaque, Cundinamarca, Colombia 6 Jan 1968 18:27:00 Near Olekminsk, Russia 5 Dec 1969 15:00:00 Nnear Albuquerque, New Mexico 29 Jan 1970 19:27:00 Near Murmansk, Russia 1 Oct 1972 19:25:00 Near Adler, USSR 11 May 1973 04:37:00 Near Semipalatinsk, USSR 30 Jun 1973 — Amman, Jordan 31 Jul 1975 — Taipei, Taiwan ▽ 9 Feb 1976 08:15:00 Irkutsk, Russia

showing 1–20 of 27