PyForTool
Python-fortran-tool
Loading...
Searching...
No Matches
cosmetics.py
1"""
2Code formatting and style transformations.
3
4Provides the Cosmetics class for modifying FORTRAN code appearance including
5case, indentation, spacing, comments, and line continuation.
6
7Key Features
8-----------
9- Case conversion (upper/lower FORTRAN keywords)
10- Indentation fixing with directive exclusion
11- Comment removal with directive preservation
12- Whitespace normalization
13- Line continuation alignment
14- USE statement ordering and grouping
15
16Classes
17-------
18Cosmetics : Mixin class providing code formatting methods
19
20Examples
21--------
22>>> pft = PYFT('input.F90')
23>>> pft.upperCase() # Convert keywords to UPPER CASE
24>>> pft.lowerCase() # Convert keywords to lower case
25>>> pft.indent() # Fix indentation
26>>> pft.removeComments() # Remove comments (keep directives)
27>>> pft.prettify() # Full formatting (indent + case + spaces)
28"""
29
30import re
31from pyfortool.util import debugDecor, nonCode, tag
32from pyfortool.expressions import createElem
33from pyfortool import NAMESPACE
34
35
36class Cosmetics():
37 """
38 Code formatting and style transformation methods.
39
40 Provides utilities for modifying code appearance including case,
41 indentation, spacing, and comment handling.
42 """
43
44 @debugDecor
45 def upperCase(self):
46 """
47 Convert all FORTRAN keywords to uppercase.
48
49 Examples
50 --------
51 >>> pft = PYFT('mixed.F90')
52 >>> pft.upperCase()
53 # WRITE -> write becomes WRITE
54 """
55 for elem in self.iter():
56 if (not nonCode(elem)) and elem is not None and elem.text is not None:
57 elem.text = elem.text.upper()
58
59 @debugDecor
60 def lowerCase(self):
61 """
62 Convert all FORTRAN keywords to lowercase.
63
64 Examples
65 --------
66 >>> pft = PYFT('MIXED.F90')
67 >>> pft.lowerCase()
68 # WRITE -> write
69 """
70 for elem in self.iter():
71 if (not nonCode(elem)) and elem is not None and elem.text is not None:
72 elem.text = elem.text.lower()
73
74 @debugDecor
75 def indent(self, nodeToUpdate=None, indentProgramunit=0, indentBranch=2,
76 exclDirectives=None):
77 """
78 Fix code indentation.
79
80 Parameters
81 ----------
82 nodeToUpdate : Element, optional
83 Specific node to indent. If None, indents entire scope.
84 indentProgramunit : int, optional
85 Number of spaces for program unit contents. Default is 0.
86 indentBranch : int, optional
87 Number of spaces for nested constructs (do, if, etc.). Default is 2.
88 exclDirectives : list, optional
89 Lines to exclude from indentation:
90 - None: Exclude '!$OMP' lines (default).
91 - []: Include all lines.
92 - ['!$acc', '!$mnh']: Custom directive list.
93
94 Returns
95 -------
96 self
97 Returns self for chaining.
98
99 Examples
100 --------
101 >>> pft = PYFT('input.F90')
102 >>> pft.indent() # Default indentation
103 >>> pft.indent(indentProgramunit=0, indentBranch=4) # Custom
104 """
105
106 if nodeToUpdate is None:
107 nodeToUpdate = self
108
109 if exclDirectives is None:
110 exclDirectives = ['!$OMP']
111
112 def setLevel(elem, level, nextElem):
113 """
114 :param elem: element whose tail must be modifies
115 :param level: level of indentation
116 :param nextElem: next element
117 """
118 if elem.tail is not None:
119 elem.tail = elem.tail.replace('\t', ' ')
120 excl = (nextElem is not None and
121 (tag(nextElem) == 'cpp' or
122 tag(nextElem) == 'C' and
123 any(nextElem.text.startswith(d) for d in exclDirectives)))
124 if not excl:
125 elem.tail = re.sub('\n[ ]*', '\n' + ' ' * level, elem.tail)
126
127 def indentRecur(elem, level, inConstruct):
128 """
129 :param elem: dom element
130 :param level: current level for elem
131 :param inConstruct: True if we are inside a construct
132 """
133 blocs = ['file', 'program-unit', 'if-block', 'where-block', 'selectcase-block']
134 progstmt = ['subroutine-stmt', 'program-stmt', 'module-stmt', 'function-stmt',
135 'submodule-stmt', 'procedure-stmt', 'interface-stmt']
136 endprogstmt = ['end-' + s for s in progstmt]
137 interbranchstmt = ['else-stmt', 'else-if-stmt', 'else-where-stmt']
138 branchstmt = ['if-then-stmt', 'where-construct-stmt'] + interbranchstmt
139 endbranchstmt = ['end-if-stmt', 'end-where-stmt']
140
141 currlevel = level
142 laste = None
143 firstnumselect = True
144 for ie, sElem in enumerate(elem):
145 # Indentation does not apply to these lines (eg SUBROUTINE statement, DO construct)
146 # but apply to the lines inside
147 if tag(sElem) in progstmt:
148 currlevel += indentProgramunit
149 elif tag(sElem) in branchstmt + [inConstruct + '-stmt']:
150 currlevel += indentBranch
151
152 # Add indentation *to the tail*, thus for the next line
153 setLevel(sElem, currlevel, elem[ie + 1] if ie + 1 < len(elem) else None)
154
155 if tag(elem) == 'selectcase-construct':
156 # Structure is:
157 # <selectcase-construct>
158 # <selectcase-block><select-case-stmt>SELECT
159 # CASE (...)</select-case-stmt> \n +2
160 # </selectcase-block>
161 # <selectcase-block><case-stmt>CASE<case-selector>(...)
162 # </case-selector></case-stmt> \n +4
163 # statement \n +4
164 # statement \n +2
165 # </selectcase-block>
166 # <selectcase-block><case-stmt>CASE<case-selector>(...)
167 # </case-selector></case-stmt> \n +4
168 # statement \n +4
169 # statement \n +0
170 # <end-select-case-stmt>END SELECT</end-select-case-stmt>
171 # </selectcase-block></selectcase-construct>
172 if firstnumselect:
173 firstnumselect = False
174 else:
175 # previous line was a CASE line, we must indent it only once
176 # pylint: disable-next=unsubscriptable-object
177 setLevel(laste[-1], level + indentBranch, sElem)
178 # statements are indented twice
179 indentRecur(sElem, level + indentBranch * 2, "")
180 if tag(sElem[-1]) == 'end-select-case-stmt':
181 setLevel(sElem[-2], level, sElem[-1])
182
183 elif tag(sElem) in blocs or tag(sElem).endswith('-construct'):
184 # This xml tag contains other tags, we iterate on them
185 if tag(sElem[0]) in interbranchstmt:
186 # Structure is <if-construct><if-block><if-then-stmt>IF...
187 # ...THEN</if-then-stmt>
188 # statement (the identation of the ELSE line is
189 # in the tail of this stetement)
190 # </if-block><if-block><else-stmt>ELSE</else-stmt>
191 # statement
192 # <end-if-stmt>ENDIF</end-if-stmt></if-block></if-construct>
193 # pylint: disable-next=unsubscriptable-object
194 setLevel(laste[-1], level, sElem)
195 construct = tag(sElem)[:-10] if tag(sElem).endswith('-construct') else ""
196 indentRecur(sElem, currlevel, construct)
197
198 # This line contains the end statement, we must remove the indentation contained
199 # in the tail of the previous item
200 if tag(sElem) in endprogstmt + endbranchstmt + ['end-' + inConstruct + '-stmt']:
201 setLevel(laste, level, sElem)
202 laste = sElem
203
204 indentRecur(nodeToUpdate, 0, "")
205 return nodeToUpdate
206
207 @debugDecor
209 """
210 Remove empty lines
211 """
212 elem = self.find('{*}file')
213 if elem is not None and elem.text is not None:
214 elem.text = elem.text.replace('\n', '')
215 for elem in self.iter():
216 if elem.tail is not None and '\n' in elem.tail:
217 elem.tail = elem.tail.replace('\t', ' ')
218 elem.tail = re.sub(r"\n[  \n]*\n", r"\n", elem.tail)
219
220 @debugDecor
221 def removeComments(self, exclDirectives=None, pattern=None):
222 """
223 Remove comments from the source code.
224
225 Parameters
226 ----------
227 exclDirectives : list, optional
228 Comments to preserve (directives to exclude):
229 - None: Preserve '!$OMP', '!$mnh', '!$ACC' lines (default).
230 - []: Remove all comments.
231 - ['!$acc']: Preserve only specific directives.
232 pattern : str or re.Pattern, optional
233 Only remove comments matching this pattern.
234
235 Examples
236 --------
237 >>> pft = PYFT('input.F90')
238 >>> pft.removeComments() # Keep OpenMP/directives
239 >>> pft.removeComments(exclDirectives=[]) # Remove all
240 >>> pft.removeComments(pattern=re.compile(r'!.*TODO'))
241 """
242 if exclDirectives is None:
243 exclDirectives = ['!$OMP', '!$mnh', '!$ACC', '!$acc']
244
245 if isinstance(pattern, str):
246 pattern = re.compile(pattern)
247
248 def recur(elem):
249 tailUpper = None
250 for ie in range(len(elem))[::-1]: # Loop from the end to the begining
251 sElem = elem[ie]
252 if tag(sElem) == 'C' and \
253 not any(sElem.text.startswith(d) for d in exclDirectives) and \
254 (pattern is None or pattern.match(sElem.text)):
255 # Don't loose the tail (containing new line character and indentation)
256 if ie != 0:
257 # It exists an element before,
258 # we add the current tail to this previsous element
259 if elem[ie - 1].tail is None:
260 elem[ie - 1].tail = sElem.tail
261 elif sElem.tail is not None:
262 elem[ie - 1].tail += sElem.tail
263 else:
264 # The's no previsous element, tail is givent back the container element
265 tailUpper = sElem.tail
266 elem.remove(sElem)
267 if len(sElem) >= 1:
268 tail = recur(sElem) # recursive call to inner elements
269 if tail is not None:
270 # The first element was a comment,
271 # its tail must be added to the text attribute
272 if sElem.text is None:
273 sElem.text = tail
274 else:
275 sElem.text += tail
276 return tailUpper
277 recur(self)
278
279 @debugDecor
280 def updateContinuation(self, nodeToUpdate=None, align=True,
281 removeALL=False, addBegin=True, removeBegin=False):
282 """
283 :param nodeToUpdate: if None, the entire xml is updated
284 :param align: True to align begin of continued lines
285 :param removeALL: True to suppress all the continuation line characters ('&')
286 :param addBegin: True to add missing continuation line characters ('&')
287 at the begining of lines
288 :param removeBegin: True to suppress continuation line characters ('&')
289 at the begining of lines
290
291 When suppressed, the '&' are replaced by a space character
292 Comments after a '&' are lost
293 """
294
295 assert not (align and removeALL), "We cannot remove and align at the same time"
296 assert not (addBegin and (removeALL or removeBegin)), \
297 "We cannot remove and add, at the same time, continuation characters"
298
299 if nodeToUpdate is None:
300 nodeToUpdate = self
301
302 parents = {} # cache to be used in recurDirect
303
304 def recurReverse(elem, tail):
305 for ie in range(len(elem))[::-1]: # Loop from the end to the begining
306 sElem = elem[ie]
307 parents[sElem] = elem
308 if tag(sElem) == 'cnt':
309 # Search for comments or cpp after the cnt node
310 commentsAfter = []
311 j = ie + 1
312 while j < len(elem) and tag(elem[j]) in ('C', 'cpp'):
313 commentsAfter.append(elem[j])
314 j += 1
315 nextNode = elem[j] if j < len(elem) else None
316
317 # Is it a '&' at the end of a line (or at the begining)?
318 isend = ((sElem.tail is not None and '\n' in sElem.tail) or
319 len(commentsAfter) > 0)
320
321 # Add missing continuation character at the begining of line
322 if isend and addBegin:
323 if sElem.tail is not None and \
324 sElem.tail.replace('\n', '').replace('\t', '').lstrip(' ') != '':
325 # tail contains text, probably an endding ')', after a carriage return
326 # Thus, there is no '&' to begin line
327 new = createElem('cnt', text='&')
328 # '&' must be put before any text on the following line containing code
329 i = 0
330 while sElem.tail[i] in (' ', '\n', '\t'):
331 i += 1
332 new.tail = ' ' + sElem.tail[i:]
333 sElem.tail = sElem.tail[:i]
334 elem.insert(ie + 1, new)
335 elif tag(nextNode) != 'cnt':
336 # There is no '&' to begin next line
337 new = createElem('cnt', text='&')
338 if len(commentsAfter) > 0:
339 # '&' must be put before any text on the following
340 # line containing code
341 i = 0
342 while i < len(commentsAfter[-1].tail) and \
343 commentsAfter[-1].tail[i] in (' ', '\n', '\t'):
344 i += 1
345 new.tail = ' ' + commentsAfter[-1].tail[i:]
346 commentsAfter[-1].tail = commentsAfter[-1].tail[:i]
347 else:
348 new.tail = ' '
349 elem.insert(ie + 1 + len(commentsAfter), new)
350
351 # Suppression
352 if removeALL or (removeBegin and not isend):
353 cpp = False
354 for com in commentsAfter[::-1]:
355 if tag(com) != 'cpp':
356 elem.remove(com)
357 else:
358 cpp = True
359 if not cpp:
360 # We cannot remove a continuation line followed by a cpp
361 elem.remove(sElem) # OK because we loop in reverse order
362 if sElem.tail is not None:
363 txt = sElem.tail.strip() + ' '
364 else:
365 txt = ' '
366 if ie != 0:
367 if elem[ie - 1].tail is None:
368 elem[ie - 1].tail = txt
369 else:
370 elem[ie - 1].tail += txt
371
372 # Recursively enter blocs
373 if len(sElem) >= 1:
374 recurReverse(sElem, tail)
375
376 def recurDirect(elem, ct, inCnt):
377 """
378 :param ct: current text
379 :param inCnt: -1 if we are not in a statement spanning several lines
380 elswhere contains the number of spaces to add
381 """
382 ignoreComment = False
383 if align:
384 for ie, sElem in enumerate(list(elem)):
385 # It is a '&' character marking the end of the line
386 isendcnt = tag(sElem) == 'cnt' and \
387 ((sElem.tail is not None and '\n' in sElem.tail) or
388 (ie + 1 < len(elem) and tag(elem[ie + 1]) == 'C'))
389 ignoreComment = (ignoreComment or
390 (isendcnt and
391 (ie + 1 < len(elem) and tag(elem[ie + 1]) == 'C')))
392
393 # REAL :: X1, & !comment 1
394 # !comment 2
395 # X2, &
396 # #ifdef XXX
397 # X3, &
398 # #endif
399 # X4
400 if isendcnt or ignoreComment or (inCnt != -1 and tag(sElem) == 'cpp'):
401 # Number of spaces for alignment not already determined
402 # (first line of the continuation)
403 if isendcnt and inCnt == -1:
404 # Search for the container statement
405 topstmt = elem
406 while not tag(topstmt).endswith('-stmt'):
407 topstmt = parents[topstmt]
408
409 # Character to align on
410 if tag(topstmt) == 'a-stmt':
411 patList = ('=>', '=', r'\‍(')
412 elif tag(topstmt) == 'call-stmt':
413 patList = (r'\‍(', r'call[ ]+\w', 'call ', 'call')
414 elif tag(topstmt) == 'if-stmt':
415 patList = (r'\‍(', r'\‍)', 'if ', 'if')
416 elif tag(topstmt) == 'where-stmt':
417 patList = (r'\‍(', r'\‍)', 'where ', 'where')
418 elif tag(topstmt) == 'forall-stmt':
419 patList = (r'\‍(', r'\‍)', 'forall ', 'forall')
420 elif tag(topstmt) == 'namelist-stmt':
421 patList = ('/.*/', '/', 'namelist')
422 elif tag(topstmt) == 'subroutine-stmt':
423 patList = (r'\‍(', r'subroutine[ ]+\w', 'subroutine ', 'subroutine')
424 elif tag(topstmt) == 'use-stmt':
425 patList = (':', r'use[ ]+\w', 'use ', 'use')
426 elif tag(topstmt) == 'T-decl-stmt':
427 patList = ('::', r'\w,', r'\w ', r'\w')
428 elif tag(topstmt) == 'print-stmt':
429 patList = ('print', )
430 elif tag(topstmt) == 'write-stmt':
431 patList = (r'\‍)', r'write[ ]*\‍(', 'write[ ]*', 'write')
432 elif tag(topstmt) == 'procedure-stmt':
433 patList = ('module[ ]+procedure[ ]*', 'module[ ]*', 'module')
434 else:
435 patList = ('::', ':', r'\‍(', '=>', '=', '[', ':', '/')
436
437 # Compute indentation value
438 inCnt = None
439 for pat in patList:
440 if inCnt is None:
441 mat = re.search(pat, ct, flags=re.IGNORECASE)
442 if mat is not None:
443 if ie + 1 < len(elem) and tag(elem[ie + 1]) != 'cnt':
444 # If there is no continuation character at the begining,
445 # align the text with the position after the delimiter
446 # found
447 inCnt = mat.end()
448 else:
449 inCnt = mat.end() - 1
450 if inCnt is None:
451 inCnt = 4
452
453 # Align the next line exept if it is a cpp line
454 if not (ie + 1 < len(elem) and tag(elem[ie + 1]) == 'cpp'):
455 if sElem.tail is not None:
456 sElem.tail = re.sub('\n[ ]*', '\n' + ' ' * inCnt, sElem.tail)
457 else:
458 sElem.tail = '\n' + ' ' * inCnt
459
460 if tag(sElem) not in ('C', 'cnt'):
461 ct += (sElem.text if sElem.text is not None else '')
462 ignoreComment = False
463
464 # Recursively enter the inner blocks
465 if len(sElem) >= 1:
466 ct, inCnt = recurDirect(sElem, ct, inCnt)
467
468 # Text after the end of block
469 ct += (sElem.tail if sElem.tail is not None else '')
470 if '\n' in ct:
471 ct = ct.split('\n')[-1]
472 if tag(sElem) not in ('cnt', 'C', 'cpp'):
473 inCnt = -1
474
475 return ct, inCnt
476
477 recurReverse(nodeToUpdate, 0)
478 recurDirect(nodeToUpdate, "", -1)
479 return nodeToUpdate
480
481 __NO_VALUE__ = '__NO_VALUE__'
482
483 @debugDecor
484 def updateSpaces(self, beforeOp=1, afterOp=1, inOperator=True,
485 beforeComma=0, afterComma=1,
486 beforeParenthesis=0, afterParenthesis=0,
487 beforeAffectation=1, afterAffectation=1, inAffectation=True,
488 beforeRangeDelim=0, afterRangeDelim=0,
489 beforeUseDelim=0, afterUseDelim=1,
490 beforeDeclDelim=1, afterDeclDelim=1,
491 inDeclDelim=True, afterTypeDecl=1,
492 beforeEqDo=0, afterEqDo=0,
493 beforeEqCall=0, afterEqCall=0,
494 beforeEqInit=0, afterEqInit=0,
495 beforeEndcnt=1, afterBegincnt=1,
496 afterIfwherecase=1, beforeThen=1, beforeIfaction=1,
497 afterProgunit=1,
498 endOfLine=True, afterName=0, inName=True,
499 beforeCmdsep=0, afterCmdsep=1,
500 adjacentKeywords=__NO_VALUE__, afterKeywords=__NO_VALUE__):
501 """
502 :param beforeOp, afterOp: number of spaces before and after operators
503 :param inOperator: True to suppress spaces in operators
504 :param beforeComma, afterComma: number of spaces before and after commas
505 :param beforeParenthesis, afterParenthesis: number of spaces before and after parenthesis
506 :param beforeAffectation, afterAffectation: number of spaces before and after
507 affectations or associations
508 :param inAffectation: True to suppress spaces in affectations and in association ('= >')
509 :param beforeRangeDelim, afterRangeDelim: number of spaces before and after range delimiters
510 :param beforeUseDelim, afterUseDelim: number of spaces before and after use delimiters (':')
511 :param beforeDeclDelim, afterDeclDelim: number of spaces before and after declaration and
512 enumerator delimiter ('::')
513 :param inDeclDelim: True to suppress spaces in declaration and enumerator delimiter (': :')
514 :param afterTypeDecl: number of spaces after the type in a declaration w/o '::'
515 (e.g. 'INTEGER I'); also for enumerators (minimum 1)
516 :param beforeEqDo, afterEqDo: number of spaces before and after '=' sign in DO and
517 FORALL statements
518 :param beforeEqCall, afterEqCall: number of spaces before and after '=' sign
519 in CALL statement
520 :param beforeEqInit, afterEqInit: number of spaces before and after '=' sign for init values
521 :param beforeEndcnt, afterBegincnt: number of spaces before a continuation chararcter at the
522 end of the line and after a continuation character
523 at the begining of a line
524 :param afterIfwherecase: number of spaces after the IF, ELSEIF, WHERE, ELSEWHERE,
525 SELECTCASE, CASE and FORALL keywords
526 :param beforeThen: number of spaces before the THEN keyword
527 :param beforeIfaction: number of spaces
528 between IF condition and action in one-line IF statement and
529 between FORALL specification and affectation in one-line FORALL
530 statement and
531 between WHERE mask and action in one-line WHERE statement
532 :param afterProgunit: between the program unit type (e.g. SUBROUTINE) and its name
533 :param endOfLine: True to suppress spaces at the end of the line
534 :param afterName: number of spaces after an indentifier, type or attribute name
535 :param inName: True to suppress spaces in identifier names
536 :param beforeCmdsep, afterCmdsep: number of spaces before and after command separator (';')
537 :param adjacentKeywords: describes the number of spaces to introduce between adjancent
538 keywords when this is legal (the list comes from the table
539 "6.2 Adjacent keywords where separating blanks are optional" of the
540 F2008 norm and has been complemented by "end select",
541 "implicit none" and "module procedure"; for the last two,
542 a minimum of 1 is required).
543 The allowed dictionnary keys are:
544 - block_data
545 - double_precision
546 - else_if
547 - else_where
548 - end_associate
549 - end_block
550 - end_block_data
551 - end_critical
552 - end_do
553 - end_enum
554 - end_file
555 - end_forall
556 - end_function
557 - end_if
558 - end_interface
559 - end_module
560 - end_procedure
561 - end_program
562 - end_selec
563 - end_select
564 - end_submodule
565 - end_subroutine
566 - end_team
567 - end_type
568 - end_where
569 - go_to
570 - in_out
571 - select_case
572 - select_type
573 - implicit_none
574 - module_procedure
575 For example, use {'end_do':1} to write 'END DO' or
576 {'end_do':0} to write 'ENDDO' or
577 {'end_do':None} to not update the writting
578 or use adjacentKeywords=None to disable everything
579 :param afterKeywords: describes the number of spaces to introduce after keywords.
580 Some keywords need a more sophisticated treatment and are controled
581 by specific keys (e.g. CASE).
582 The keys are the keyword in lowercase, some names can be tricky
583 to guess (e.g. the key for ENDFILE is 'end-file'). By default
584 only a few are defined.
585 Use afterKeywords=None to disable everything.
586
587 To not update spaces, put None instead of an integer and False in booleans.
588 For example, to not change number of spaces after a comma, use afterComma=None
589
590 Updates are done in the following order:
591 """
592
593 adjaKeyDesc = {
594 'block_data': (1, './/{*}block-data-stmt'),
595 'double_precision': (1, './/{*}intrinsic-T-spec/{*}T-N'),
596 'else_if': (1, './/{*}else-if-stmt'),
597 'else_where': (0, './/{*}else-where-stmt'),
598 'end_associate': (1, './/{*}end-associate-stmt'),
599 'end_block': (1, './/{*}end-block-stmt'),
600 'end_block_data': (1, './/{*}end-block-data-stmt'),
601 'end_critical': (1, './/{*}end-critical-stmt'),
602 'end_do': (1, './/{*}end-do-stmt'),
603 'end_enum': (1, './/{*}end-enum-stmt'),
604 'end_file': (1, './/{*}end-file-stmt'),
605 'end_forall': (1, './/{*}end-forall-stmt'),
606 'end_function': (1, './/{*}end-function-stmt'),
607 'end_if': (1, './/{*}end-if-stmt'),
608 'end_interface': (1, './/{*}end-interface-stmt'),
609 'end_module': (1, './/{*}end-module-stmt'),
610 'end_procedure': (1, './/{*}end-procedure-stmt'),
611 'end_program': (1, './/{*}end-program-stmt'),
612 'end_selec': (1, './/{*}end-select-case-stmt'),
613 'end_select': (1, './/{*}end-select-T-stmt'),
614 'end_submodule': (1, './/{*}end-submodule-stmt'),
615 'end_subroutine': (1, './/{*}end-subroutine-stmt'),
616 'end_team': (1, './/{*}end-change-team-stmt'),
617 'end_type': (1, './/{*}end-T-stmt'),
618 'end_where': (1, './/{*}end-where-stmt'),
619 'go_to': (0, './/{*}goto-stmt'),
620 'in_out': (0, './/{*}intent-spec'),
621 'select_case': (1, './/{*}select-case-stmt'),
622 'select_type': (1, './/{*}select-T-stmt'),
623 'implicit_none': (1, './/{*}implicit-none-stmt'),
624 'module_procedure': (1, './/{*}procedure-stmt'),
625 }
626
627 afterKey = {
628 'print': 0,
629 'call': 1,
630 'use': 1,
631 'do': 1,
632 'end-file': 1,
633 'save': 1,
634 }
635
636 assert adjacentKeywords is None or adjacentKeywords == self.__NO_VALUE__ or \
637 all(k in adjaKeyDesc
638 for k in adjacentKeywords), "Unknown key in **adjacentKeywords"
639
640 def getvalAdja(key):
641 if adjacentKeywords is None:
642 return None
643 if adjacentKeywords == self.__NO_VALUE__:
644 return adjaKeyDesc[key][0]
645 return adjacentKeywords.get(key, adjaKeyDesc[key][0])
646
647 def getvalAfter(key):
648 key = key[:-5]
649 if afterKeywords != self.__NO_VALUE__:
650 num = afterKeywords.get(key, afterKey.get(key, None))
651 else:
652 num = afterKey.get(key, None)
653 return num
654
655 assert afterProgunit is None or afterProgunit >= 1
656 assert afterTypeDecl is None or afterTypeDecl >= 1
657 for k in ('implicit_none', 'module_procedure'):
658 num = getvalAdja(k)
659 assert num is None or num >= 1, \
660 "adjacentKeywords['" + k + "'] must be at least 1 (is " + str(num) + ")"
661 for k in ('use', 'call', 'end-file', 'do'):
662 num = getvalAfter(k + '-stmt')
663 assert num is None or num >= 1, \
664 "afterKeywords['" + k + "'] must be at least 1 (is " + str(num) + ")"
665
666 for elem in self.iter():
667 isNotC = tag(elem) != 'C'
668 # security
669 if elem.tail is None:
670 elem.tail = ""
671 elem.tail = elem.tail.replace('\t', ' ')
672
673 # Around parenthesis
674 if beforeParenthesis is not None:
675 elem.tail = re.sub(r"[  ]*\‍(", " " * beforeParenthesis + r"(", elem.tail)
676 elem.tail = re.sub(r"[  ]*\‍)", " " * beforeParenthesis + r")", elem.tail)
677 if elem.text is not None and isNotC:
678 elem.text = re.sub(r"[  ]*\‍(", " " * beforeParenthesis + r"(", elem.text)
679 elem.text = re.sub(r"[  ]*\‍)", " " * beforeParenthesis + r")", elem.text)
680 if afterParenthesis is not None:
681 elem.tail = re.sub(r"\‍([  ]*", "(" + " " * afterParenthesis, elem.tail)
682 elem.tail = re.sub(r"\‍)[  ]*", ")" + " " * afterParenthesis, elem.tail)
683 if elem.text is not None and isNotC:
684 elem.text = re.sub(r"\‍([  ]*", "(" + " " * afterParenthesis, elem.text)
685 elem.text = re.sub(r"\‍)[  ]*", ")" + " " * afterParenthesis, elem.text)
686
687 # Around commas
688 if beforeComma is not None:
689 elem.tail = re.sub(r"[  ]*,", " " * beforeComma + r",", elem.tail)
690 if elem.text is not None and isNotC:
691 elem.text = re.sub(r"[  ]*,", " " * beforeComma + r",", elem.text)
692 if afterComma is not None:
693 elem.tail = re.sub(r",[  ]*", "," + " " * afterComma, elem.tail)
694 if elem.text is not None and isNotC:
695 elem.text = re.sub(r",[  ]*", "," + " " * afterComma, elem.text)
696
697 # End of line
698 if endOfLine:
699 elem.tail = re.sub(r"[  ]*\n", r"\n", elem.tail)
700
701 # In names or around names (identifier, type, attribute)
702 if tag(elem) in ('N', 'T-N', 'attribute-N'):
703 if inName:
704 for nnn in elem.findall('{*}n'):
705 if nnn.tail is not None:
706 nnn.tail = nnn.tail.strip(' ')
707 if elem.tail is not None and afterName is not None:
708 elem.tail = ' ' * afterName + elem.tail.lstrip(' ')
709
710 # Around range delimiter
711 elif tag(elem) == 'lower-bound' and elem.tail is not None and ':' in elem.tail:
712 if beforeRangeDelim is not None:
713 elem.tail = ' ' * beforeRangeDelim + elem.tail.lstrip(' ')
714 if afterRangeDelim is not None:
715 elem.tail = elem.tail.rstrip(' ') + ' ' * beforeRangeDelim
716
717 # Around ':' in USE statements
718 elif tag(elem) == 'module-N' and elem.tail is not None and ':' in elem.tail:
719 if beforeUseDelim is not None:
720 elem.tail = re.sub(r"[  ]*:", " " * beforeUseDelim + r":", elem.tail)
721 if afterUseDelim is not None:
722 elem.tail = re.sub(r":[  ]*", ":" + " " * afterUseDelim, elem.tail)
723
724 # Around and in '::' in declaration statements
725 # After the type in a declaration
726 elif tag(elem) in ('attribute', '_T-spec_') and elem.tail is not None:
727 if inDeclDelim:
728 elem.tail = re.sub(r":[  ]*:", r"::", elem.tail)
729 if beforeDeclDelim is not None:
730 elem.tail = re.sub(r"[ ]*(:[  ]*:)", ' ' * beforeDeclDelim + r"\1", elem.tail)
731 if afterDeclDelim is not None:
732 elem.tail = re.sub(r"(:[  ]*:)[ ]*", r"\1" + ' ' * afterDeclDelim, elem.tail)
733 if tag(elem) == '_T-spec_' and afterTypeDecl is not None:
734 elem.tail = elem.tail.rstrip(' ') + ' ' * afterTypeDecl
735
736 # Around and in '::' in enumerators
737 # After the enumerator keyword
738 elif tag(elem) == 'enumerator-stmt' and elem.text is not None:
739 if ':' in elem.text:
740 if inDeclDelim:
741 elem.text = re.sub(r":[  ]*:", r"::", elem.text)
742 if beforeDeclDelim is not None:
743 elem.text = re.sub(r"[ ]*(:[  ]*:)", ' ' * beforeDeclDelim + r"\1",
744 elem.text)
745 if afterDeclDelim is not None:
746 elem.text = re.sub(r"(:[  ]*:)[ ]*", r"\1" + ' ' * afterDeclDelim,
747 elem.text)
748 elif afterTypeDecl is not None:
749 elem.text = elem.text.rstrip(' ') + ' ' * afterTypeDecl
750
751 # Between the program unit type and its name
752 elif (tag(elem) in ('subroutine-stmt', 'program-stmt', 'module-stmt', 'function-stmt',
753 'submodule-stmt', 'procedure-stmt', 'interface-stmt',
754 'end-subroutine-stmt', 'end-program-stmt',
755 'end-module-stmt', 'end-function-stmt',
756 'end-submodule-stmt', 'end-procedure-stmt', 'end-interface-stmt')
757 and afterProgunit is not None):
758 if elem.text is not None:
759 elem.text = elem.text.rstrip(' ') + ' ' * afterProgunit
760
761 # Around '=' sign in DO and FORALL statements
762 elif tag(elem) in ('do-V', 'V') and elem.tail is not None and '=' in elem.tail:
763 if beforeEqDo is not None:
764 elem.tail = re.sub('[ ]*=', ' ' * beforeEqDo + '=', elem.tail)
765 if afterEqDo is not None:
766 elem.tail = re.sub('=[ ]*', '=' + ' ' * beforeEqDo, elem.tail)
767
768 # Around '=' sign in CALL statements
769 elif tag(elem) == 'arg-N' and elem.tail is not None and '=' in elem.tail:
770 if beforeEqCall is not None:
771 elem.tail = re.sub('[ ]*=', ' ' * beforeEqCall + '=', elem.tail)
772 if afterEqCall is not None:
773 elem.tail = re.sub('=[ ]*', '=' + ' ' * beforeEqCall, elem.tail)
774
775 # Around '=' sign for init values
776 elif (tag(elem) in ('EN-N', 'named-constant') and
777 elem.tail is not None and '=' in elem.tail):
778 if beforeEqInit is not None:
779 elem.tail = re.sub('[ ]*=', ' ' * beforeEqInit + '=', elem.tail)
780 if afterEqInit is not None:
781 elem.tail = re.sub('=[ ]*', '=' + ' ' * beforeEqInit, elem.tail)
782 # Around the command separator ';'
783 elif tag(elem) == 'smc':
784 if beforeCmdsep is not None:
785 prev = self.getSiblings(elem, after=False)
786 if len(prev) != 0 and prev[-1].tail is not None:
787 prev[-1].tail = ' ' * beforeCmdsep + prev[-1].tail.lstrip(' ')
788 if afterCmdsep is not None and elem.tail is not None:
789 elem.tail = elem.tail.rstrip(' ') + ' ' * afterCmdsep
790
791 # Around and in association operators (affectation case done after)
792 elif tag(elem) == 'associate-N' and elem.tail is not None and '=' in elem.tail:
793 if beforeAffectation is not None:
794 elem.tail = re.sub('[ ]*=', ' ' * beforeAffectation + '=', elem.tail)
795 if afterAffectation is not None:
796 elem.tail = re.sub('>[ ]*', '>' + ' ' * beforeAffectation, elem.tail)
797 if inAffectation:
798 elem.tail = re.sub(r'=[ ]*>', '=>', elem.tail)
799
800 # After a reserved keyword
801 # elif afterKeywords is not None and tag(elem).endswith('-stmt'):
802 # num = getvalAfter(tag(elem))
803 # if num is not None and elem.text is not None:
804 # elem.text = elem.text.rstrip(' ') + ' ' * num
805
806 # Another loop on elements
807 # All the transformations are not put in a single loop because the following one act
808 # on sub-elements. Putting them all in the same loop would prevent to control in which order
809 # the different transformations occur.
810 # For instance, the suppression on the space after the parenthesis must be done before
811 # the adding of a space before a THEN keyword
812 for elem in self.iter():
813 # Around and in operators
814 if tag(elem) == 'op-E': # op are always (?) in op-E nodes
815 for op in elem.findall('{*}op'):
816 if beforeOp is not None:
817 io = list(elem).index(op)
818 if io != 0:
819 prev = elem[io - 1]
820 if prev.tail is None:
821 prev.tail = ' ' * beforeOp
822 else:
823 prev.tail = prev.tail.rstrip(' ') + ' ' * beforeOp
824 if afterOp is not None:
825 if op.tail is None:
826 op.tail = ' ' * afterOp
827 else:
828 op.tail = op.tail.lstrip(' ') + ' ' * afterOp
829 if inOperator:
830 for oo in op.findall('{*}o'):
831 if oo.tail is not None:
832 oo.tail = oo.tail.strip(' ')
833
834 # Around and in affectation operators (association case done before)
835 elif tag(elem) in ('a-stmt', 'pointer-a-stmt'):
836 # a are always (?) in a-stmt or pointer-a-stmt nodes
837 for aff in elem.findall('{*}a'):
838 if beforeAffectation is not None:
839 prev = elem[list(elem).index(aff) - 1]
840 if prev.tail is None:
841 prev.tail = ' ' * beforeAffectation
842 else:
843 prev.tail = prev.tail.rstrip(' ') + ' ' * beforeAffectation
844 if afterAffectation is not None:
845 if aff.tail is None:
846 aff.tail = ' ' * afterAffectation
847 else:
848 aff.tail = aff.tail.lstrip(' ') + ' ' * afterAffectation
849 if inAffectation:
850 aff.text = aff.text.replace(' ', '')
851
852 # After a IF, WHERE, ELSEIF, ELSEWHERE, SELECTCASE, CASE and FORALL keyword,
853 # and before THEN keyword
854 elif tag(elem) in ('if-stmt', 'if-then-stmt', 'else-if-stmt',
855 'where-stmt', 'where-construct-stmt', 'else-where-stmt',
856 'select-case-stmt', 'case-stmt',
857 'forall-stmt', 'forall-construct-stmt'):
858 if afterIfwherecase is not None and elem.text is not None:
859 if tag(elem) == 'case-stmt':
860 # the (eventual) parenthesis is not in the text of the node
861 elem.text = elem.text.rstrip(' ') + ' ' * afterIfwherecase
862 else:
863 elem.text = re.sub(r'[ ]*\‍(', ' ' * afterIfwherecase + '(',
864 elem.text, count=1)
865 if tag(elem) in ('if-then-stmt', 'else-if-stmt') and beforeThen is not None:
866 cond = elem.find('{*}condition-E')
867 cond.tail = re.sub(r'\‍)[ ]*([a-zA-Z]*$)', ')' + ' ' * beforeThen + r'\1',
868 cond.tail)
869 elif tag(elem) == 'if-stmt' and beforeIfaction is not None:
870 cond = elem.find('{*}condition-E')
871 cond.tail = re.sub(r'\‍)[ ]*$', ')' + ' ' * beforeIfaction, cond.tail)
872 elif tag(elem) == 'where-stmt' and beforeIfaction is not None:
873 cond = elem.find('{*}mask-E')
874 cond.tail = re.sub(r'\‍)[ ]*$', ')' + ' ' * beforeIfaction, cond.tail)
875 elif tag(elem) == 'forall-stmt' and beforeIfaction is not None:
876 sub = elem.find('{*}forall-triplet-spec-LT')
877 sub.tail = re.sub(r'\‍)[ ]*$', ')' + ' ' * beforeIfaction, sub.tail)
878
879 # Direct search to prevent using the costly getParent function
880 if beforeEndcnt is not None or afterBegincnt is not None:
881 for elem in self.findall('.//{*}cnt/..'): # node containing continuation characters
882 for cnt in elem.findall('{*}cnt'): # continuation characters
883 ic = list(elem).index(cnt)
884 if ic == 0:
885 # the string before the continuation character is in the parent text
886 prev = elem
887 pstring = prev.text
888 else:
889 # the string before the continuation character is in previsous sibling tail
890 prev = elem[ic - 1]
891 pstring = prev.tail
892 if '\n' in pstring and '\n' in cnt.tail:
893 # continuation character alone on a line
894 pass
895 elif '\n' in pstring and afterBegincnt is not None:
896 # continuation character at the begining of a line
897 cnt.tail = ' ' * afterBegincnt + cnt.tail.lstrip(' ')
898 elif beforeEndcnt is not None:
899 # continuation character at the end of a line
900 # (eventually followed by a comment)
901 if prev == elem:
902 prev.text = prev.text.rstrip(' ') + ' ' * beforeEndcnt
903 else:
904 prev.tail = prev.tail.rstrip(' ') + ' ' * beforeEndcnt
905
906 # In adjacent keywords
907 for key, val in adjaKeyDesc.items():
908 num = getvalAdja(key)
909 if num is not None:
910 for node in self.findall(val[1]):
911 lf = "[ ]*".join(["(" + p + ")" for p in key.split('_')])
912 repl = (" " * num).join([r"\{i}".format(i=i + 1)
913 for i, _ in enumerate(key.split('_'))])
914 node.text = re.sub(lf, repl, node.text, flags=re.IGNORECASE)
915
916 @debugDecor
917 def changeIfStatementsInIfConstructs(self, singleItem=None):
918 """
919 Convert one-line IF statements to IF-THEN-ENDIF blocks.
920
921 Parameters
922 ----------
923 singleItem : Element, optional
924 Specific IF statement node to convert. If None, converts all.
925
926 Transformation
927 -------------
928 Before:
929 IF(A=B) print*, "C"
930
931 After:
932 IF(A=B) THEN
933 print*, "C"
934 END IF
935
936 Examples
937 --------
938 >>> pft = PYFT('input.F90')
939 >>> pft.changeIfStatementsInIfConstructs()
940 """
941 if singleItem is not None:
942 ifstmt = [singleItem]
943 else:
944 ifstmt = self.findall('.//{*}if-stmt')
945 for item in ifstmt:
946 cycleStmt = item.findall('.//{*}cycle-stmt')
947 if len(cycleStmt) == 0:
948 # Get indentation from last sibling
949 par = self.getParent(item)
950 ind = par[:].index(item)
951 if ind != 0 and par[ind - 1].tail is not None:
952 # if tail of previous sibling exists
953 currIndent = len(par[ind - 1].tail) - len(par[ind - 1].tail.rstrip(' '))
954 else:
955 # no tail = no indentation
956 currIndent = 0
957
958 # Convert if-stmt into if-construct
959 # <if-stmt>IF(<condition-E>...</condition-E>) <f:action-stmt>...
960 # ...</f:action-stmt></f:if-stmt>
961 # <if-construct><if-block><if-then-stmt>IF(<f:condition-E>...
962 # ...</condition-E>) THEN</f:if-then-stmt>
963 # ...
964 # <f:end-if-stmt>ENDIF</f:end-if-stmt></f:if-block></f:if-construct>
965 # 1 create missing blocks
966 item.tag = f'{{{NAMESPACE}}}if-construct'
967 ifBlock = createElem('if-block')
968 ifThenStmt = createElem('if-then-stmt')
969 endif = createElem('end-if-stmt')
970 ifBlock.append(ifThenStmt)
971 item.append(ifBlock)
972 # 2 move 'IF(' text
973 ifThenStmt.text = item.text # copy 'IF(' text
974 ifThenStmt.tail = '\n' + (2 + currIndent) * ' ' # indentation for main statement
975 item.text = None # remove olf 'IF(' text
976 # 3 move condition and add THEN
977 condition = item.find('{*}condition-E')
978 if not condition.tail.endswith(' '):
979 condition.tail += ' '
980 condition.tail += 'THEN'
981 ifThenStmt.append(condition)
982 item.remove(condition)
983 # 4 move action
984 action = item.find('{*}action-stmt')
985 action[0].tail = '\n' + currIndent * ' ' # indentation for the ENDIF
986 ifBlock.append(action[0])
987 item.remove(action)
988 # 5 add ENDIF
989 endif.text = 'END IF'
990 ifBlock.append(endif)
991 # 6 remove any cnt which was directly in the if-stmt node
992 # (replaced by '\n' after THEN)
993 for cnt in item.findall('./{*}cnt'):
994 item.remove(cnt)
995
996 @debugDecor
998 """
999 Remove the CONTAINS statement if this section is empty
1000 """
1001 for contains in self.findall('.//{*}contains-stmt'):
1002 par = self.getParent(contains)
1003 index = list(par).index(contains)
1004 nextStmt = index + 1
1005 while tag(par[nextStmt]) == 'C':
1006 nextStmt += 1
1007 if tag(par[nextStmt]) in ('end-subroutine-stmt', 'end-function-stmt',
1008 'end-module-stmt'):
1009 # CONTAINS bloc is empty
1010 par.remove(contains)
1011
1012 @debugDecor
1013 def formatModuleUse(self, upper=True):
1014 """
1015 Order USE declarations by module type and alphabetically.
1016
1017 Groups USE statements by module type prefix and sorts alphabetically
1018 within each group. The group order is: MODD_, MODE_, MODI_, MODN_,
1019 followed by any other modules sorted alphabetically.
1020 Module names are optionally converted to uppercase.
1021 Trailing comments after USE statements are preserved.
1022
1023 Parameters
1024 ----------
1025 upper : bool, optional
1026 If True, convert module names to uppercase. Default is True.
1027
1028 Returns
1029 -------
1030 self
1031 Returns self for method chaining.
1032
1033 Examples
1034 --------
1035 >>> pft = PYFT('input.F90')
1036 >>> pft.formatModuleUse()
1037 >>> pft.formatModuleUse(upper=False)
1038
1039 Notes
1040 -----
1041 Before:
1042 USE modi_foo
1043 USE modd_bar
1044 USE mode_baz
1045
1046 After (upper=True):
1047 USE MODD_BAR
1048 USE MODE_BAZ
1049 USE MODI_FOO
1050 """
1051 SORTED_GROUPS = ('MODD', 'MODE', 'MODI', 'MODN')
1052
1053 for progUnit in self.findall('.//{*}program-unit'):
1054 children = list(progUnit)
1055 useGroups = []
1056 i = 0
1057 while i < len(children):
1058 child = children[i]
1059 if tag(child) == 'use-stmt':
1060 comments = []
1061 i += 1
1062 while i < len(children) and tag(children[i]) == 'C':
1063 comments.append(children[i])
1064 i += 1
1065 useGroups.append([child, comments])
1066 else:
1067 i += 1
1068
1069 if not useGroups:
1070 continue
1071
1072 def _getModuleName(stmt):
1073 n = stmt.find('.//{*}module-N/{*}N/{*}n')
1074 return n.text if n is not None and n.text is not None else ''
1075
1076 groups = {prefix: [] for prefix in SORTED_GROUPS}
1077 other = []
1078
1079 for stmt, comments in useGroups:
1080 name = _getModuleName(stmt).upper()
1081 categorized = False
1082 for prefix in SORTED_GROUPS:
1083 if name.startswith(prefix + '_'):
1084 groups[prefix].append([stmt, comments])
1085 categorized = True
1086 break
1087 if not categorized:
1088 other.append([stmt, comments])
1089
1090 for prefix in SORTED_GROUPS:
1091 groups[prefix].sort(key=lambda g: _getModuleName(g[0]).upper())
1092 other.sort(key=lambda g: _getModuleName(g[0]).upper())
1093
1094 def _addSep(ordered):
1095 prev_stmt, prev_comments = ordered[-1]
1096 if prev_comments:
1097 prev_comments[-1].tail = (prev_comments[-1].tail or '').rstrip('\n') + '\n!\n'
1098 else:
1099 prev_stmt.tail = (prev_stmt.tail or '').rstrip('\n') + '\n!\n'
1100
1101 ordered = []
1102 for prefix in SORTED_GROUPS:
1103 if groups[prefix]:
1104 if ordered:
1105 _addSep(ordered)
1106 ordered.extend(groups[prefix])
1107 if other:
1108 if ordered:
1109 _addSep(ordered)
1110 ordered.extend(other)
1111
1112 firstIdx = None
1113 for i, child in enumerate(progUnit):
1114 if tag(child) == 'use-stmt':
1115 firstIdx = i
1116 break
1117
1118 if firstIdx is not None:
1119 for stmt, comments in useGroups:
1120 progUnit.remove(stmt)
1121 for c in comments:
1122 progUnit.remove(c)
1123 idx = firstIdx
1124 for stmt, comments in ordered:
1125 progUnit.insert(idx, stmt)
1126 idx += 1
1127 for c in comments:
1128 progUnit.insert(idx, c)
1129 idx += 1
1130
1131 if upper:
1132 for stmt, _ in ordered:
1133 modN = stmt.find('.//{*}module-N')
1134 modNNs = set(modN.findall('.//{*}n')) if modN is not None else set()
1135 for n in stmt.findall('.//{*}n'):
1136 if n not in modNNs and n.text:
1137 n.text = n.text.upper()
1138
1139 return self
updateContinuation(self, nodeToUpdate=None, align=True, removeALL=False, addBegin=True, removeBegin=False)
Definition cosmetics.py:281
removeComments(self, exclDirectives=None, pattern=None)
Definition cosmetics.py:221
formatModuleUse(self, upper=True)
updateSpaces(self, beforeOp=1, afterOp=1, inOperator=True, beforeComma=0, afterComma=1, beforeParenthesis=0, afterParenthesis=0, beforeAffectation=1, afterAffectation=1, inAffectation=True, beforeRangeDelim=0, afterRangeDelim=0, beforeUseDelim=0, afterUseDelim=1, beforeDeclDelim=1, afterDeclDelim=1, inDeclDelim=True, afterTypeDecl=1, beforeEqDo=0, afterEqDo=0, beforeEqCall=0, afterEqCall=0, beforeEqInit=0, afterEqInit=0, beforeEndcnt=1, afterBegincnt=1, afterIfwherecase=1, beforeThen=1, beforeIfaction=1, afterProgunit=1, endOfLine=True, afterName=0, inName=True, beforeCmdsep=0, afterCmdsep=1, adjacentKeywords=__NO_VALUE__, afterKeywords=__NO_VALUE__)
Definition cosmetics.py:500
indent(self, nodeToUpdate=None, indentProgramunit=0, indentBranch=2, exclDirectives=None)
Definition cosmetics.py:76
changeIfStatementsInIfConstructs(self, singleItem=None)
Definition cosmetics.py:917