import { addBanner, addArticle, addTitle, addHeader, addParagraph, addSubHeader } from '/scripts/article.js'; import { addInset, addInsetList, addInsetCodeListing, addInsetBulletList } from '/scripts/inset.js'; import { addImageWithCaption, addButtonGroup } from '/scripts/visuals.js'; import { addSidebar} from '/scripts/sidebar.js'; import { addSyntax } from '/scripts/code.js'; import { menu } from '/scripts/web_dev_buttons.js'; import { global_menu } from '/scripts/grid_layout1.js'; import { local_menu } from '/scripts/linux.js'; const heading = document.querySelector(".heading"); const global = document.querySelector(".global_menu"); const local = document.querySelector(".local_menu"); const sidebar = document.querySelector(".sidebar"); const main = document.querySelector(".main_content"); heading.append(addTitle("AWK Essential Training")); heading.append(addParagraph("David D. Levine - LinkedIn Learning - November 2022")); heading.append(addParagraph("Chapter 6 - Formatting the Output")); main.append(addHeader("FORMATTING OUTPUT WITH printf( )")) main.append(addParagraph("In addition to print, awk also includes the printf function which you might recognise if you are familiar with programming languages such as C or Java. This allows you to format your output and the syntax is pretty similar to the syntax in C or Java.")) main.append(addParagraph("You can use it to format the output from a file to neatly display its contents or to send to a file , you can also use it interactively in order to tidy up your output.")) main.append(addParagraph("The general syntax is")) main.append(addSyntax("printf(format,value ...)")) main.append(addParagraph("The parentheses are optional but it's probably a good habit to include them for readability. ")) main.append(addParagraph("The format is a single string and it includes format specifiers such as %s to indicate a string or %d to indicate a decimal number. The value or values correspond to the format specifiers in the same order so if you specity a format that includes two of these, let's say %s followed by %d, you will have two values which will be a string and a decimal number.")) main.append(addParagraph("To give you some idea of how this works, we will take a file")) main.append(addImageWithCaption("./images/nameemailavg_csv.jpg", "The file we went to format.")) main.append(addParagraph("This is a csv file with a bowler's name, email address and average score. We want to output this in a way that is a little bit easier to read, so we might use a tab character as the output field separator.")) main.append(addParagraph("We have three fields so if we use printf to format the output, we will need three specifiers and three values and we can include the tabs in the format string so we don't need to specify any value for OFS. The command would look something like this.")) main.append(addSyntax("awk -F '{printf(\"%s\t%s\t%d\", $1, $2, $3)}' nameemailavg.csv")) main.append(addParagraph("If we break down that format string, it starts with a string, %s, which is replaced by $1. We they have a tab character followed by a second string which will be replaced by $2. Next is another tab character followed by a decimal number, %d, which is replaced by $3 and the string ends with a newline character and the fields, $1, $2 and $3 are provided as values.")) main.append(addParagraph("Unlike print, print doesn't automatically add a newline character which means that we have to include it if we want the line to terminate with one.")) main.append(addParagraph("Since we know what each field represents in this file, we can say that each line of output will consist of the bowlers name, email address and average and these will be separated by tabs so that the data appears to be arranged in 3 columns as shown below. ")) main.append(addImageWithCaption("./images/nameemailavg_out1.jpg", "Our first attempt to format the output.")) main.append(addParagraph("This is much easier to read than the original file but the columns are not correctly aligned due to the fact that some of the names are a little bit longer which nudges the subsequent fields out of alignment. In the next section, we will look at ways to fix that.")) main.append(addHeader("FORMATTING OUTPUT WITH WIDTH AND PRECISION SPECIFIERS")) main.append(addParagraph("Along with a format specifier, we can also specify a field width which gres us a little bit more control over the output. We will run the same commead again but we will replaces the tabs with a width which goes between the % sign and he format specifiers. For example, if we want to display the first field with a width of 20, the format specifier, %s, will be changed to")) main.append(addSyntax("%20s")) main.append(addParagraph("The command therefore can be written as ")) main.append(addSyntax("awk -F, '{printf (\"%20s %30s %3d\n\", $1, $2, $3)}' nameemailavg.csv")) main.append(addParagraph("Note that we specified widths of 30 and 3 for $2 and $3 respectively and there is now a space between the format specifiers")) main.append(addImageWithCaption("./images/nameemailavg_out2.jpg", "Specifying a width in the format specifiers.")) main.append(addParagraph("This looks a little better apart from two things. Firstly, one of the email addresses is a little longer than the others so that line is still slightly out compared to the other lines. Secondly, the text is right-justified which makes it a little harder to read.")) main.append(addParagraph("Note that when we specify a width, such as 30 for field 2 which contains email addresses, this is a minimum so if one in the records has a field that is longer than the specified width, the string will occupy the number of characters required to display the whole field. For example, although we specified a field width of 30, one of the email addresses has a length of 32 characters which puts the next field out of the column.")) main.append(addParagraph("To fix the first issue, we just need to increase the field width, setting it to 35.")) main.append(addParagraph("To fix the second issue, we can specify the widths as negative numbers, so our command becomes")) main.append(addSyntax("awk -F, '{printf (\"%-20s %-35s %-3d\n\", $1, $2, $3)}' nameemailavg.csv")) main.append(addParagraph("This gives us the result shown below.")) main.append(addImageWithCaption("./images/nameemailavg_out3.jpg", "The output left-justified.")) main.append(addParagraph("Again, this looks better and is neatly displayed in columns.")) main.append(addParagraph("You might have noticed that the input file shows the averages as floating point numbers, but we have been displaying these as decimal values because we used %d as the format specifier. You might not be too surprised to learn that for a floating point number, the format speafier is %f, so we will make that change and see how this affects the output.")) main.append(addImageWithCaption("./images/nameemailavg_out4.jpg", "Outputting the average scores as floating point numbers.")) main.append(addParagraph("In the original file, the averages are shown to 9 decimal places, but the default for %f is 6 decimal places which is what we see in the output.")) main.append(addParagraph("In reality, two decimal places would be more reasonable. Another issue with this output is that one (and only one) of the bowlers has an average score that is less than 100 which means that it doesn't quike line up with the other averages.")) main.append(addParagraph("To fix both of these issues, we can specify the width of the third field as a floating point number. Let's assume that we want to specify the output as a number to two decimal places and we want the integer part of the number to occupy three spaces. That is 6 characters in total. For instance, the first average is")) main.append(addSyntax("256.62")) main.append(addParagraph("which, including the point, is 6 characters. We would specify that as")) main.append(addSyntax("%6.2f")) main.append(addParagraph("where 6 is the width of the field as it is output and 2 is the precision - the number of places after the decimal point. So the command becomes")) main.append(addSyntax("awk -F, '{printf (\"%-20s %-35s %6.2f\n\", $1, $2, $3)}' nameemailavg.csv")) main.append(addParagraph("Note that this time, we haven't specified a negative sign in the float format specifier so that field will be left-justified.")) main.append(addImageWithCaption("./images/nameemailavg_out5.jpg", "The output nicely formatted with the averages (in the third field ) being displayed to 2 decimal places and the column left-justified.")) main.append(addParagraph("If we put a 0 at the start of the width for that floating point number, all of the values in that field will have the same length.")) main.append(addImageWithCaption("./images/nameemailavg_out6.jpg", "The floating point numbers with a leading zero to make all the valves in the third field the same length.")) main.append(addParagraph("It is probably a good idea to throw in a reminder at this point that the specified field width is a minimum. That is, each number will occupy (in this case) 6 spaces in total. The first three spaces are occupied by the part of the number before the decimal point, the fourth by the decimal point and the remaining 2 by the part of the number after the decinal point.")) main.append(addParagraph("We don't have to worry about the last part of the number because that part of the number is specied as occupying two spaces and anything after that is discarded.")) main.append(addParagraph("We know that the part of the number before the decimal point will occupy, at most, three spaces so specifying a value of 6.2 will work for all values.")) main.append(addParagraph("However, if we had an average value that is 1000 or more, the field size will not be big enough and so it will display with 7 spaces (more if needed) to fully account for the digits before the decimal point.")) main.append(addParagraph("That won't be a problem here because the maximum value for the average cannot exceed the maximum score which is, I believe, 240.")) main.append(addParagraph("Obviously, the field length was derived from the field lengths in the input file. The thing to remember here is that the field length is selected to fit the data and if you wanted to use this in a real-world application, it is important to remember that you must specify a field length that is big enough to display that field correctly for every value. In addition, if we assume that the input file can be edited then this would likely involve updating the average scores.")) main.append(addParagraph("When selecting an appropriate field width, you should select a value that not only works for all of the current fields but will also work for all possible values.")) main.append(addParagraph("In some cases, that might mean specifying an arbitrarily large number for the width or it might simply be a case of selecting a width that will be large enough for the majority of cases.")) main.append(addParagraph("You might want to check out the AWK documentation for a list of other format specitiens that you can use with printf.")) addSidebar("linux");