MODFLOW 6  version 6.7.0.dev3
USGS Modular Hydrologic Model
Profiler.f90
Go to the documentation of this file.
2  use kindmodule, only: i4b, dp, lgp
10  implicit none
11  private
12 
13  ! constants for memory allocation
14  integer(I4B), parameter :: max_sections_per_sln = 40
15  integer(I4B), public, parameter :: len_section_title = 128
16 
17  ! data structure to store measurements for a section
18  type, private :: measuredsectiontype
19  character(len=LEN_SECTION_TITLE) :: title !< title to identify timed section in log
20  real(dp) :: walltime !< walltime spent in section
21  integer(I4B) :: count !< number of times section was entered
22  integer(I4B) :: status !< =1 means section timer started, =0 otherwise
23  integer(I4B) :: parent_id !< id of parent, or 0 when root
24  type(stlvecint) :: children !< ids of children
25  end type measuredsectiontype
26 
27  !> @brief A public type for profiling performance in the application.
28  !! The ProfilerType is used to measure and record the performance of various
29  !! parts of the application. It provides mechanisms to start, stop, and
30  !< report on the performance metrics collected during execution.
31  type, public :: profilertype
32  real(dp) :: sim_start_time !< the simulation start time which lies before initialization of the profiler
33  ! handles for the global simulation structure (with no simulation objects to store them)
34  integer(I4B) :: tmr_run !< handle to timed section "Run"
35  integer(I4B) :: tmr_init !< handle to timed section "Initialize"
36  integer(I4B) :: tmr_update !< handle to timed section "Update"
37  integer(I4B) :: tmr_finalize !< handle to timed section "Finalize"
38  integer(I4B) :: tmr_prep_tstp !< handle to timed section "Prepare time step"
39  integer(I4B) :: tmr_do_tstp !< handle to timed section "Do time step"
40  integer(I4B) :: tmr_final_tstp !< handle to timed section "Finalize time step"
41  integer(I4B) :: tmr_output !< handle to timed section "Write output"
42  integer(I4B) :: tmr_nc_export !< handle to timed section "NetCDF export"
43  ! private
44  integer(I4B), private :: iout !< output unit number, typically simulation listing file
45  integer(I4B), private :: pr_option !< 0 = NONE, 1 = SUMMARY, 2 = DETAIL
46  integer(I4B), private :: nr_sections !< number of sections
47  integer(I4B), private, dimension(3) :: top_three !< top three leaf sections based on walltime
48  integer(I4B), private :: max_title_len !< maximum title length
49  integer(I4B), private :: root_id !< currently only one root section is supported, this is the id
50  type(measuredsectiontype), dimension(:), pointer :: all_sections => null() !< all timed sections (up to MAX_NR_TIMED_SECTIONS)
51  type(stlstackint) :: callstack !< call stack of section ids
52  contains
53  procedure :: pre_init
54  procedure :: initialize
55  procedure :: start
56  procedure :: stop
57  procedure :: print
58  procedure :: destroy
59  ! private
60  procedure, private :: set_print_option
61  procedure, private :: add_section
62  procedure, private :: print_section
63  procedure, private :: print_total
64  procedure, private :: aggregate_walltime
65  procedure, private :: aggregate_counts
66  procedure, private :: largest_title_length
67  procedure, private :: sort_by_walltime
68  end type profilertype
69 
70  type(profilertype), public :: g_prof !< the global timer object (to reduce trivial lines of code)
71 
72 contains
73 
74  !< @brief To save the start time before initialization
75  subroutine pre_init(this)
76  class(profilertype) :: this
77 
78  call cpu_time(this%sim_start_time)
79 
80  end subroutine pre_init
81 
82  !< @brief Initialize the CPU timer object
83  !<
84  subroutine initialize(this)
85  class(profilertype) :: this
86  ! local
87  character(len=LENMEMPATH) :: input_mempath
88  type(characterstringtype), dimension(:), contiguous, &
89  pointer :: slntype
90  character(len=:), pointer :: prprof
91  integer(I4B) :: i
92  integer(I4B) :: max_sections
93  integer(I4B) :: nr_solutions
94 
95  this%tmr_run = -1
96  this%tmr_init = -1
97  this%tmr_update = -1
98  this%tmr_finalize = -1
99  this%tmr_prep_tstp = -1
100  this%tmr_do_tstp = -1
101  this%tmr_final_tstp = -1
102  this%tmr_output = -1
103  this%tmr_nc_export = -1
104 
105  call this%callstack%init()
106 
107  ! get nr of solutions from input context
108  input_mempath = create_mem_path('SIM', 'NAM', idm_context)
109  call mem_setptr(slntype, 'SLNTYPE', input_mempath)
110  nr_solutions = size(slntype)
111  call mem_setptr(prprof, 'PRPROF', input_mempath)
112  call this%set_print_option(prprof)
113 
114  max_sections = max_sections_per_sln * nr_solutions
115  allocate (this%all_sections(max_sections))
116  do i = 1, max_sections
117  this%all_sections(i)%title = "undefined"
118  this%all_sections(i)%status = 0
119  this%all_sections(i)%walltime = dzero
120  this%all_sections(i)%count = 0
121  this%all_sections(i)%parent_id = 0
122  call this%all_sections(i)%children%init()
123  end do
124 
125  this%nr_sections = 0
126  this%root_id = 0
127  this%top_three = [0, 0, 0]
128 
129  ! start root section here with previously recorded start time
130  if (this%pr_option > 0) then
131  call this%start("Run", this%tmr_run)
132  this%all_sections(this%tmr_run)%walltime = -this%sim_start_time
133  end if
134 
135  end subroutine initialize
136 
137  !> @brief Add a new timed section to the tree,
138  !! passing the parent id will add it as a child
139  !< in the tree
140  function add_section(this, title, parent_id) result(section_id)
141  use simmodule, only: ustop
142  class(profilertype) :: this
143  character(len=*) :: title
144  integer(I4B) :: parent_id
145  integer(I4B) :: section_id
146 
147  ! increment to new section id
148  this%nr_sections = this%nr_sections + 1
149  section_id = this%nr_sections
150  if (section_id > size(this%all_sections)) then
151  write (*, *) "Internal error: Too many profiled sections, "&
152  &"disable profiling to circumvent."
153  call ustop()
154  end if
155 
156  ! initialize new section
157  this%all_sections(section_id)%title = title
158  this%all_sections(section_id)%walltime = dzero
159  this%all_sections(section_id)%status = 0
160 
161  ! if parent, otherwise root section
162  if (parent_id > 0) then
163  ! add child to parent
164  this%all_sections(section_id)%parent_id = parent_id
165  call this%all_sections(parent_id)%children%push_back(section_id)
166  else
167  ! this is the root, assume there's only one!
168  this%all_sections(section_id)%parent_id = 0
169  this%root_id = section_id
170  end if
171 
172  end function add_section
173 
174  !> @brief Start section timing, add when not exist yet (i.e. when id < 1)
175  !<
176  subroutine start(this, title, section_id)
177  class(profilertype) :: this
178  character(len=*) :: title
179  integer(I4B) :: section_id
180  ! local
181  integer(I4B) :: parent_id
182  real(DP) :: start_time
183  type(measuredsectiontype), pointer :: section
184 
185  if (this%pr_option == 0) return
186 
187  call cpu_time(start_time)
188 
189  if (section_id == -1) then
190  ! add section if not exist
191  parent_id = 0 ! root
192  if (this%callstack%size() > 0) then
193  parent_id = this%callstack%top()
194  end if
195  section_id = this%add_section(title, parent_id)
196  end if
197  call this%callstack%push(section_id)
198 
199  section => this%all_sections(section_id)
200  section%count = section%count + 1
201  section%status = 1
202  section%walltime = section%walltime - start_time
203 
204  end subroutine start
205 
206  subroutine stop(this, section_id)
207  class(profilertype) :: this
208  integer(I4B) :: section_id
209  ! local
210  real(DP) :: end_time
211  type(measuredsectiontype), pointer :: section
212 
213  if (this%pr_option == 0) return
214 
215  call cpu_time(end_time)
216 
217  ! nett result (c.f. start(...)) is adding (dt = end_time - start_time)
218  section => this%all_sections(section_id)
219  section%status = 0
220  section%walltime = section%walltime + end_time
221 
222  ! pop from call stack
223  call this%callstack%pop()
224 
225  end subroutine stop
226 
227  subroutine print(this, output_unit)
228  class(profilertype) :: this
229  integer(I4B), intent(in) :: output_unit
230  ! local
231  integer(I4B) :: level, i, top_idx
232  integer(I4B), dimension(:), allocatable :: sorted_idxs
233 
234  if (this%pr_option == 0) return
235 
236  this%iout = output_unit
237 
238  ! get top three leaf sections based on walltime
239  top_idx = 1
240  sorted_idxs = (/(i, i=1, this%nr_sections)/)
241  call this%sort_by_walltime(sorted_idxs)
242  do i = 1, this%nr_sections
243  if (this%all_sections(sorted_idxs(i))%children%size == 0) then ! leaf node
244  if (top_idx > 3) exit
245  this%top_three(top_idx) = sorted_idxs(i)
246  top_idx = top_idx + 1
247  end if
248  end do
249 
250  this%max_title_len = this%largest_title_length()
251 
252  if (this%pr_option > 1) then
253  ! print timing call stack
254  level = 0
255  write (this%iout, '(/1x,a/)') &
256  repeat('-', 18)//" Profiler: Call Stack "//repeat('-', 18)
257  call this%print_section(this%root_id, level)
258  end if
259 
260  ! print walltime per category from substring (if exist)
261  ! note: the sections containing the substring should not be nested,
262  ! otherwise the walltime will be counted multiple times
263  write (this%iout, '(1x,a/)') &
264  repeat('-', 20)//" Profiler: Totals "//repeat('-', 20)
265  call this%print_total("Formulate")
266  call this%print_total("Linear solve")
267  call this%print_total("Calculate flows")
268  call this%print_total("Calculate budgets")
269  call this%print_total("Write output")
270  call this%print_total("Parallel Solution")
271  call this%print_total("MPI_WaitAll")
272  write (this%iout, '(/1x,a/)') &
273  repeat('-', 22)//" End Profiler "//repeat('-', 22)
274 
275  end subroutine print
276 
277  recursive subroutine print_section(this, section_id, level)
278  use arrayhandlersmodule, only: ifind
279  class(profilertype) :: this
280  integer(I4B) :: section_id
281  integer(I4B) :: level
282  ! local
283  integer(I4B) :: i, new_level, nr_padding, idx_top
284  real(dp) :: percent
285  type(measuredsectiontype), pointer :: section
286  character(len=:), allocatable :: title_padded
287  character(len=LINELENGTH) :: top_marker
288 
289  section => this%all_sections(section_id)
290 
291  ! calculate percentage
292  percent = 1.0_dp
293  if (section%parent_id /= 0) then
294  percent = section%walltime / this%all_sections(this%root_id)%walltime
295  end if
296  percent = percent * 100.0_dp
297 
298  ! determine if section should be marked as top three
299  top_marker = ""
300  idx_top = ifind(this%top_three, section_id)
301  if (idx_top > 0) then
302  nr_padding = max(0, 32 - level * 4)
303  write (top_marker, '(a,i0)') repeat(" ", nr_padding)//"<== #", idx_top
304  end if
305 
306  ! print section timing
307  nr_padding = this%max_title_len - len_trim(section%title) + 2
308  title_padded = trim(section%title)//":"//repeat(' ', nr_padding)
309  write (this%iout, '(3a,f6.2,2a,f14.6,2a,i0,a,a)') " ", &
310  repeat('....', level), "[", percent, "%] ", title_padded, &
311  section%walltime, "s", " (", section%count, "x)", trim(top_marker)
312 
313  ! print children
314  new_level = level + 1
315  do i = 1, section%children%size
316  call this%print_section(section%children%at(i), new_level)
317  end do
318 
319  if (level == 0) write (this%iout, *)
320 
321  end subroutine print_section
322 
323  subroutine print_total(this, subtitle)
324  class(profilertype) :: this
325  character(len=*) :: subtitle
326  ! local
327  integer(I4B) :: count
328  real(DP) :: walltime, percent
329  integer(I4B) :: nr_padding
330  character(len=:), allocatable :: title_padded
331 
332  ! get maximum length of title
333  nr_padding = this%max_title_len - len_trim(subtitle)
334  title_padded = trim(subtitle)//repeat(' ', nr_padding)
335 
336  count = this%aggregate_counts(subtitle)
337  if (count > 0) then
338  walltime = aggregate_walltime(this, subtitle)
339  percent = (walltime / this%all_sections(this%root_id)%walltime) * 100.0_dp
340  write (this%iout, '(2a,f6.2,3a,f14.6,2a,i0,a)') " ", "[", percent, &
341  "%] ", title_padded, ": ", walltime, "s", " (", count, "x)"
342  end if
343 
344  end subroutine print_total
345 
346  !> @brief Aggregate walltime over sections with a certain title
347  !<
348  function aggregate_walltime(this, title) result(walltime)
349  class(profilertype) :: this
350  character(len=*) :: title
351  real(dp) :: walltime
352  ! local
353  integer(I4B) :: i
354 
355  walltime = dzero
356  do i = 1, this%nr_sections
357  if (index(this%all_sections(i)%title, trim(title)) > 0) then
358  walltime = walltime + this%all_sections(i)%walltime
359  end if
360  end do
361 
362  end function aggregate_walltime
363 
364  !> @brief Aggregate counts over sections with a certain title
365  !<
366  function aggregate_counts(this, title) result(counts)
367  class(profilertype) :: this
368  character(len=*) :: title
369  integer(I4B) :: counts
370  ! local
371  integer(I4B) :: i
372 
373  counts = 0
374  do i = 1, this%nr_sections
375  if (index(this%all_sections(i)%title, trim(title)) > 0) then
376  counts = counts + this%all_sections(i)%count
377  end if
378  end do
379 
380  end function aggregate_counts
381 
382  !> @brief Set the profile option from the user input
383  !<
384  subroutine set_print_option(this, profile_option)
385  class(profilertype) :: this
386  character(len=*), intent(in) :: profile_option
387 
388  select case (trim(profile_option))
389  case ("NONE")
390  this%pr_option = 0
391  case ("SUMMARY")
392  this%pr_option = 1
393  case ("DETAIL")
394  this%pr_option = 2
395  case default
396  this%pr_option = 0
397  end select
398 
399  end subroutine set_print_option
400 
401  !> @brief Clean up the CPU timer object
402  !<
403  subroutine destroy(this)
404  class(profilertype) :: this
405  ! local
406  integer(I4B) :: i
407 
408  call this%callstack%destroy()
409 
410  do i = 1, size(this%all_sections)
411  call this%all_sections(i)%children%destroy()
412  end do
413  deallocate (this%all_sections)
414  nullify (this%all_sections)
415 
416  end subroutine destroy
417 
418  !> @brief Calculate the largest title length
419  !<
420  function largest_title_length(this) result(max_length)
421  class(profilertype) :: this
422  integer(I4B) :: max_length
423  integer(I4B) :: i
424 
425  max_length = 0
426  do i = 1, this%nr_sections
427  max_length = max(max_length, len_trim(this%all_sections(i)%title))
428  end do
429 
430  end function largest_title_length
431 
432  !> @brief Sort section indexes based on walltime
433  !<
434  subroutine sort_by_walltime(this, idxs)
435  class(profilertype) :: this
436  integer(I4B), dimension(:), allocatable :: idxs !< array with unsorted section idxs
437  integer(I4B) :: i, j, temp
438 
439  ! Simple bubble sort for demonstration purposes
440  do i = 1, size(idxs) - 1
441  do j = 1, size(idxs) - i
442  if (this%all_sections(idxs(j))%walltime < &
443  this%all_sections(idxs(j + 1))%walltime) then
444  temp = idxs(j)
445  idxs(j) = idxs(j + 1)
446  idxs(j + 1) = temp
447  end if
448  end do
449  end do
450 
451  end subroutine sort_by_walltime
452 
453 end module profilermodule
subroutine destroy(this)
Definition: CharString.f90:132
This module contains simulation constants.
Definition: Constants.f90:9
integer(i4b), parameter linelength
maximum length of a standard line
Definition: Constants.f90:45
real(dp), parameter dnodata
real no data constant
Definition: Constants.f90:95
real(dp), parameter dzero
real constant zero
Definition: Constants.f90:65
integer(i4b), parameter lenmempath
maximum length of the memory path
Definition: Constants.f90:27
This module defines variable data types.
Definition: kind.f90:8
character(len=lenmempath) function create_mem_path(component, subcomponent, context)
returns the path to the memory object
type(profilertype), public g_prof
the global timer object (to reduce trivial lines of code)
Definition: Profiler.f90:70
integer(i4b) function aggregate_counts(this, title)
Aggregate counts over sections with a certain title.
Definition: Profiler.f90:367
integer(i4b), parameter max_sections_per_sln
Definition: Profiler.f90:14
real(dp) function aggregate_walltime(this, title)
Aggregate walltime over sections with a certain title.
Definition: Profiler.f90:349
subroutine pre_init(this)
Definition: Profiler.f90:76
subroutine sort_by_walltime(this, idxs)
Sort section indexes based on walltime.
Definition: Profiler.f90:435
integer(i4b), parameter, public len_section_title
Definition: Profiler.f90:15
subroutine set_print_option(this, profile_option)
Set the profile option from the user input.
Definition: Profiler.f90:385
subroutine print(this, output_unit)
Definition: Profiler.f90:228
integer(i4b) function largest_title_length(this)
Calculate the largest title length.
Definition: Profiler.f90:421
subroutine initialize(this)
Definition: Profiler.f90:85
integer(i4b) function add_section(this, title, parent_id)
Add a new timed section to the tree, passing the parent id will add it as a child.
Definition: Profiler.f90:141
recursive subroutine print_section(this, section_id, level)
Definition: Profiler.f90:278
subroutine start(this, title, section_id)
Start section timing, add when not exist yet (i.e. when id < 1)
Definition: Profiler.f90:177
subroutine stop(this, section_id)
Definition: Profiler.f90:207
subroutine print_total(this, subtitle)
Definition: Profiler.f90:324
This module contains simulation methods.
Definition: Sim.f90:10
subroutine, public ustop(stopmess, ioutlocal)
Stop the simulation.
Definition: Sim.f90:312
This module contains simulation variables.
Definition: SimVariables.f90:9
character(len=linelength) idm_context
This class is used to store a single deferred-length character string. It was designed to work in an ...
Definition: CharString.f90:23
A public type for profiling performance in the application. The ProfilerType is used to measure and r...
Definition: Profiler.f90:31
A derived type representing a stack of integers.
Definition: STLStackInt.f90:14