gtool5 Fortran 90/95 ライブラリ 1.0.0-rc5
English
Loading...
Searching...
No Matches
dc_regex.f90
Go to the documentation of this file.
1!-----------------------------------------------------------------------
2! Copyright (c) 2000-2026 Gtool Development Group. All rights reserved.
3!-----------------------------------------------------------------------
63 implicit none
64 private
65 public:: match
66
67 character, save :: C_ESCAPE = '#'
68 integer, parameter :: SYM_EOL = -128
69 integer, parameter :: SYM_ANYCHAR = 500
70 integer, parameter :: SYM_QUESTION = 501
71 integer, parameter :: SYM_PLUS = 502
72 integer, parameter :: SYM_STAR = 503
73 integer, parameter :: SYM_NORMAL_SET = 520
74 integer, parameter :: SYM_REVERSED_SET = 521
75 integer, parameter :: SYM_HEADFIX = 540
76 integer, parameter :: SYM_TAILFIX = 541
77 integer, parameter :: SYM_ISDIGIT = 560
78 integer, parameter :: SYM_ISALPHA = 561
79 integer, parameter :: SYM_ISWORD = 562
80 integer, parameter :: SYM_ISSPACE = 563
81 integer, parameter :: SYM_ISXDIGIT = 564
82 integer, parameter :: SYM_COUNT_BASE = 1000
83
84contains
85
105 subroutine preprocess_pattern(pattern, symbols)
106 character(len = *), intent(in):: pattern
107 integer, intent(out):: symbols(:)
108 integer:: i, j, code, imax, j_last_set
109 integer:: status, stat_return
110 integer, parameter:: &
111 & STAT_INIT = 1, &
112 & STAT_ESCAPE = 2, &
113 & STAT_OPEN_SET = 3, &
114 & STAT_IN_SET = 4, &
115 & STAT_HEXADECIMAL = 5
116 character:: c
117 continue
118 status = stat_init
119 stat_return = stat_init
120 symbols(:) = sym_eol
121 j_last_set = 0
122 imax = len_trim(pattern)
123
124 j = 1
125 do, i = 1, imax
126 c = pattern(i:i)
127 select case(status)
128 case(stat_init)
129 if (c == c_escape) then
130 status = stat_escape
131 cycle
132 else if (c == "[") then
133 symbols(j) = sym_normal_set
134 status = stat_open_set
135 else if (c == ".") then
136 symbols(j) = sym_anychar
137 else if (c == "?") then
138 symbols(j) = sym_question
139 else if (c == "+") then
140 symbols(j) = sym_plus
141 else if (c == "*") then
142 symbols(j) = sym_star
143 else if (c == "^" .and. i == 1) then
144 symbols(j) = sym_headfix
145 else if (c == "$" .and. i == imax) then
146 symbols(j) = sym_tailfix
147 else
148 symbols(j) = ichar(c)
149 end if
150 case(stat_escape)
151 if (c == 'd' .or. c == 'D') then
152 symbols(j) = sym_isdigit
153 else if (c == 'a' .or. c == 'A') then
154 symbols(j) = sym_isalpha
155 else if (c == 'w' .or. c == 'W') then
156 symbols(j) = sym_isword
157 else if (c == 's' .or. c == 'S') then
158 symbols(j) = sym_isspace
159 else if (c == 'z' .or. c == 'Z') then
160 symbols(j) = sym_isxdigit
161 else if (c == 'x' .or. c == 'X') then
162 symbols(j) = -1
163 status = stat_hexadecimal
164 cycle
165 else
166 symbols(j) = ichar(c)
167 end if
168 status = stat_return
169 case(stat_hexadecimal)
170 code = index("123456789ABCDEFabcdef", c)
171 if (code >= 16) code = code - 6
172 if (symbols(j) == -1) then
173 symbols(j) = code
174 cycle
175 else
176 symbols(j) = symbols(j) * 16 + code
177 status = stat_return
178 end if
179 case(stat_open_set)
180 symbols(j) = sym_count_base
181 j_last_set = j
182 stat_return = stat_in_set
183 if (c == '^') then
184 symbols(j - 1) = sym_reversed_set
185 status = stat_in_set
186 else if (c == c_escape) then
187 status = stat_escape
188 else
189 j = j + 1
190 symbols(j) = ichar(c)
191 status = stat_in_set
192 end if
193 case(stat_in_set)
194 if (c == ']') then
195 symbols(j_last_set) = sym_count_base + j - j_last_set - 1
196 stat_return = stat_init
197 status = stat_init
198 cycle
199 else if (c == c_escape) then
200 status = stat_escape
201 cycle
202 else
203 symbols(j) = ichar(c)
204 end if
205 end select
206 j = j + 1
207 end do
208 select case(status)
209 case(stat_escape)
210 symbols(j) = ichar(' ')
211 case(stat_open_set)
212 symbols(j) = sym_count_base
213 case(stat_in_set)
214 symbols(j_last_set) = sym_count_base + j - j_last_set - 1
215 end select
216 end subroutine preprocess_pattern
217
246 recursive subroutine match_here(ipat, text, length)
247 integer, intent(in):: ipat(:)
248 character(len = *), intent(in):: text
249 integer, intent(out):: length
250 integer:: s1, s2, remain, i, hitmax, hitcount, hit_at_least
251 logical:: normal_hit
252 continue
253 ! パターンの終わり。空パターンには何でもマッチ
254 if (size(ipat) == 0 .or. ipat(1) == sym_eol) then
255 length = 0
256 return
257 end if
258 ! パターンの文末固定指示
259 if (ipat(1) == sym_tailfix) then
260 if (text == "") then
261 length = 0
262 else
263 length = -1
264 end if
265 return
266 end if
267 if (len(text) == 0) then
268 length = -1
269 return
270 end if
271 ! 1字指定(範囲または1字リテラル)の抽出 ... ipat(s1:s2)
272 if (ipat(1) == sym_normal_set) then
273 s1 = 3
274 s2 = 2 + ipat(2) - sym_count_base
275 normal_hit = .true.
276 else if (ipat(1) == sym_reversed_set) then
277 s1 = 3
278 s2 = 2 + ipat(2) - sym_count_base
279 normal_hit = .false.
280 else
281 s1 = 1
282 s2 = 1
283 normal_hit = .true.
284 end if
285 ! その次の記号 ipat(s2+1) は量化子か次の1字指定である
286 remain = s2 + 2
287 select case (ipat(s2 + 1))
288 case(sym_star)
289 hitmax = len(text)
290 hit_at_least = 0
291 case(sym_plus)
292 hitmax = len(text)
293 hit_at_least = 1
294 case(sym_question)
295 hitmax = 1
296 hit_at_least = 0
297 case default
298 hitmax = 1
299 hit_at_least = 1
300 remain = s2 + 1
301 end select
302 ! 現位置以降の1字指定のヒット数を数える
303 hitcount = 0
304 do, i = 1, hitmax
305 if (hit(ipat(s1:s2), text(i:i)) .neqv. normal_hit) then
306 exit
307 end if
308 hitcount = i
309 end do
310 ! 現位置で無ヒットの場合、ヒットを要するならマッチ失敗
311 if (hitcount < hit_at_least) then
312 length = -1
313 return
314 end if
315 ! 最長原理: なるべく長くヒットしたものから、残りのマッチする
316 ! ものを探す。いわゆる最左最長探索の最長である。
317 do, i = 1 + hitcount, 1 + hit_at_least, -1
318 call match_here(ipat(remain: ), text(i: ), length)
319 if (length >= 0) then
320 length = length + i - 1
321 return
322 end if
323 end do
324 length = -1
325 end subroutine match_here
326
350 logical function hit(ipat, c) result(result)
351 integer, intent(in) :: ipat(:)
352 character(len=*), intent(in) :: c
353 character(len=*), parameter :: &
354 & DIGIT = "0123456789", &
355 & XDIGIT = "ABCDEFabcdef", &
356 & ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
357 integer:: i
358 continue
359 do, i = 1, size(ipat)
360 select case(ipat(i))
361 case(sym_anychar)
362 result = .true.
363 case(sym_isalpha)
364 result = (index(alpha, c) > 0)
365 case(sym_isdigit)
366 result = (index(digit, c) > 0)
367 case(sym_isword)
368 result = (index(digit, c) > 0 .or. index(alpha, c) > 0 .or. &
369 & c == '_')
370 case(sym_isxdigit)
371 result = (index(digit, c) > 0 .or. index(xdigit, c) > 0)
372 case(sym_isspace)
373 result = (c == ' ' .or. (iachar(c) >= 8 .and. iachar(c) <= 13))
374 case default
375 result = (ipat(i) == ichar(c))
376 end select
377 if (result) return
378 end do
379 result = .false.
380 end function hit
381
468 subroutine match(pattern, text, start, length)
469 implicit none
470 character(len = *), intent(in) :: pattern, text
471 integer, intent(out) :: start, length
472 integer, allocatable :: ipattern(:)
473 integer :: text_length
474 continue
475 ! 空 pattern は空文字列に適合
476 if (len(pattern) <= 0) then
477 length = 0
478 start = 1
479 return
480 endif
481 ! メタキャラクタの認識
482 allocate(ipattern(len(pattern) + 2))
483 call preprocess_pattern(pattern, ipattern)
484 ! 頭寄せ指定のある場合
485 if (ipattern(1) == sym_headfix) then
486 start = 1
487 call match_here(ipattern(2: ), text, length)
488 if (length < 0) goto 995
489 goto 999
490 end if
491 ! 最左原理
492 text_length = len(text)
493 do, start = 1, text_length + 1
494 call match_here(ipattern, text(start:text_length), length)
495 if (length >= 0) goto 999
496 end do
497 ! みつからない場合
498995 continue
499 start = 0
500 length = -1
501999 continue
502 deallocate(ipattern)
503 end subroutine match
504
505end module dc_regex
シンプルな正規表現関数 'match' を提供します.
Definition dc_regex.f90:62
subroutine, public match(pattern, text, start, length)
Definition dc_regex.f90:469