Get your flow on – Dataflows – 02-02

one big challenge in processing the results for an event in a lasting dataset is that you have to be able to store historic results as you are not able to read them in from the website. For security, they block constant read attempts. So you need a storage layer for our video series we will be using Power BI Dataflows, however, there are many other options available to you depending on the technologies you are familiar with.

YouTube player

REMEMBER: In order to build this solution for a parkrun event and be able to share it, you will need to have a Power BI Pro subscription (or higher), you can build this for yourself using a free workspace, but you won’t be able to share it and you can also follow along with the steps either in Power BI desktop (you won’t be able to scale up through) or Excel with Power Query.

In today’s video, we build the basic component of results read in. I like to build my dataflow in Power BI Desktop and then copy the code through from the advanced editor, the reason for this is that Desktop tends to have more resources so you can develop quicker, however, you have to be aware that some things from Power BI Desktop do not work in Power BI Dataflows Web.BrowserContents ( ) is a perfect example and need to be changed to Web.Contents ( ). Be prepared to hit these obstacles and remediate them, however, being able to use your local processing resources is often quicker.

Basic build is below

let
  Source = Web.Contents("https://www.parkrun.org.uk/jesmonddene/results/latestresults/"),
  #"Extracted Table From Html" = Html.Table(Source, {{"Column1", "TABLE.Results-table.Results-table\-\-compact.js-ResultsTable > * > TR > :nth-child(1)"}, {"Column2", "TABLE.Results-table.Results-table\-\-compact.js-ResultsTable > * > TR > :nth-child(2)"}, {"Column3", "TABLE.Results-table.Results-table\-\-compact.js-ResultsTable > * > TR > :nth-child(3)"}, {"Column4", "TABLE.Results-table.Results-table\-\-compact.js-ResultsTable > * > TR > :nth-child(4)"}, {"Column5", "TABLE.Results-table.Results-table\-\-compact.js-ResultsTable > * > TR > :nth-child(5)"}, {"Column6", "TABLE.Results-table.Results-table\-\-compact.js-ResultsTable > * > TR > :nth-child(6)"}}, [RowSelector = "TABLE.Results-table.Results-table\-\-compact.js-ResultsTable > * > TR"]),
  #"Promoted Headers" = Table.PromoteHeaders(#"Extracted Table From Html", [PromoteAllScalars = true]),
  #"Changed Type" = Table.TransformColumnTypes(#"Promoted Headers", {{"Position", Int64.Type}, {"parkrunner", type text}, {"Gender", type text}, {"Age Group", type text}, {"Club", type text}, {"Time", type text}}),
  #"Split Column by Delimiter" = Table.SplitColumn(#"Changed Type", "parkrunner", Splitter.SplitTextByEachDelimiter({"|"}, QuoteStyle.Csv), {"parkrunner.1", "parkrunner.2"}),
  #"Changed Type1" = Table.TransformColumnTypes(#"Split Column by Delimiter", {{"parkrunner.1", type text}, {"parkrunner.2", type text}}),
  #"Split Column by Character Transition" = Table.SplitColumn(#"Changed Type1", "parkrunner.1", Splitter.SplitTextByCharacterTransition((c) => not List.Contains({"0".."9"}, c), {"0".."9"}), {"parkrunner.1.1", "parkrunner.1.2"}),
  #"Renamed Columns" = Table.RenameColumns(#"Split Column by Character Transition", {{"parkrunner.1.1", "Person"}}),
  #"Split Column by Character Transition1" = Table.SplitColumn(#"Renamed Columns", "parkrunner.1.2", Splitter.SplitTextByCharacterTransition({"0".."9"}, (c) => not List.Contains({"0".."9"}, c)), {"parkrunner.1.2.1", "parkrunner.1.2.2"}),
  #"Trimmed Text" = Table.TransformColumns(#"Split Column by Character Transition1",{{"parkrunner.1.2.1", Text.Trim, type text}}),
  #"Renamed Columns1" = Table.RenameColumns(#"Trimmed Text", {{"parkrunner.1.2.1", "parkruns Completed"}}),
  #"Removed Columns" = Table.RemoveColumns(#"Renamed Columns1", {"parkrunner.1.2.2"}),
  #"Split Column by Delimiter1" = Table.SplitColumn(#"Removed Columns", "Gender", Splitter.SplitTextByEachDelimiter({" "}, QuoteStyle.Csv), {"Gender.1", "Gender.2"}),
  #"Changed Type2" = Table.TransformColumnTypes(#"Split Column by Delimiter1", {{"Person", type text}, {"Gender.1", type text}, {"Gender.2", Int64.Type}}),
  #"Trimmed Text1" = Table.TransformColumns(Table.TransformColumnTypes(#"Changed Type2", {{"Gender.2", type text}}, "en-GB"),{{"Gender.1", Text.Trim, type text}, {"Gender.2", Text.Trim, type text}}),
  #"Renamed Columns2" = Table.RenameColumns(#"Trimmed Text1", {{"Gender.2", "Gender Position"}, {"Gender.1", "Gender"}}),
  #"Split Column by Delimiter2" = Table.SplitColumn(#"Renamed Columns2", "parkrunner.2", Splitter.SplitTextByEachDelimiter({"|"}, QuoteStyle.Csv, true), {"parkrunner.2.1", "parkrunner.2.2"}),
  #"Changed Type3" = Table.TransformColumnTypes(#"Split Column by Delimiter2", {{"parkrunner.2.1", type text}, {"parkrunner.2.2", type text}}),
  #"Split Column by Delimiter3" = Table.SplitColumn(#"Changed Type3", "parkrunner.2.1", Splitter.SplitTextByEachDelimiter({"|"}, QuoteStyle.Csv, true), {"parkrunner.2.1.1", "parkrunner.2.1.2"}),
  #"Changed Type4" = Table.TransformColumnTypes(#"Split Column by Delimiter3", {{"parkrunner.2.1.1", type text}, {"parkrunner.2.1.2", type text}}),
  #"Removed Columns1" = Table.RemoveColumns(#"Changed Type4", {"parkrunner.2.1.1", "parkrunner.2.1.2"}),
  #"Split Column by Position" = Table.SplitColumn(#"Removed Columns1", "Age Group", Splitter.SplitTextByPositions({0, 16}, true), {"Age Group.1", "Age Group.2"}),
  #"Changed Type5" = Table.TransformColumnTypes(#"Split Column by Position", {{"Age Group.1", type text}, {"Age Group.2", type text}}),
  #"Renamed Columns3" = Table.RenameColumns(#"Changed Type5", {{"Age Group.1", "Age Category"}, {"Age Group.2", "Age Grade"}}),
  #"Split Column by Position1" = Table.SplitColumn(#"Renamed Columns3", "Age Grade", Splitter.SplitTextByPositions({0, 10}, true), {"Age Grade.1", "Age Grade.2"}),
  #"Changed Type6" = Table.TransformColumnTypes(#"Split Column by Position1", {{"Age Grade.1", Percentage.Type}, {"Age Grade.2", type text}}),
  #"Removed Columns2" = Table.RemoveColumns(#"Changed Type6", {"Age Grade.2"}),
  #"Renamed Columns4" = Table.RenameColumns(#"Removed Columns2", {{"Age Grade.1", "Age PCT"}}),
  #"Added Custom Column" = Table.AddColumn(#"Renamed Columns4", "FinishTime", each Text.Combine({"00:", Text.Start([Time], 5)}), type text),
  #"Changed Type7" = Table.TransformColumnTypes(#"Added Custom Column", {{"FinishTime", type time}}),
  #"Replaced Errors" = Table.ReplaceErrorValues(#"Changed Type7", {{"FinishTime", #time(0, 0, 0)}})
in
  #"Replaced Errors"

Leave a Reply