Optimise segmentation
Avoid costly `@` and prefer one-time reversal.
This commit is contained in:
		
							parent
							
								
									67acf25cd7
								
							
						
					
					
						commit
						30bb11b988
					
				
					 1 changed files with 15 additions and 16 deletions
				
			
		|  | @ -33,23 +33,22 @@ let split_string_by_unicode_codepoint str = | ||||||
| (*Split a Ocaml string [str] to a `str list` *) | (*Split a Ocaml string [str] to a `str list` *) | ||||||
|   let pred_codepoint = ref (-1) in |   let pred_codepoint = ref (-1) in | ||||||
|   let segmented_unit_list = ref [] in |   let segmented_unit_list = ref [] in | ||||||
|   let iterator x y _ = |   let iterator () y _ = | ||||||
|     let _ = if  !pred_codepoint > -1 then |     let () = if  !pred_codepoint > -1 then | ||||||
|       let current_codepoint = y in |       let current_codepoint = y in | ||||||
|       let pred_char_len = current_codepoint - !pred_codepoint in |       let pred_char_len = current_codepoint - !pred_codepoint in | ||||||
|       let unit_substring = Stdlib.String.sub x !pred_codepoint pred_char_len in |       let unit_substring = Stdlib.String.sub str !pred_codepoint pred_char_len in | ||||||
|       let _ = segmented_unit_list := !segmented_unit_list @ [unit_substring] in |       segmented_unit_list := unit_substring :: !segmented_unit_list | ||||||
|       unit_substring |     in | ||||||
|     else |     let () =  pred_codepoint := y in | ||||||
|       "" in |     () | ||||||
|     let _ =  pred_codepoint := y in x in |   in | ||||||
| 
 | 
 | ||||||
|   let _ = Uutf.String.fold_utf_8 iterator str str in |   let _ = Uutf.String.fold_utf_8 iterator () str in | ||||||
|   let last_char_len = (Stdlib.String.length str) - !pred_codepoint in |   let last_char_len = (Stdlib.String.length str) - !pred_codepoint in | ||||||
|   if last_char_len > 0 then |   let () = | ||||||
|     let unit_substring = Stdlib.String.sub str !pred_codepoint last_char_len in |     if last_char_len > 0 then | ||||||
|     let _ = segmented_unit_list := !segmented_unit_list @ [unit_substring] in |       let unit_substring = Stdlib.String.sub str !pred_codepoint last_char_len in | ||||||
|     !segmented_unit_list |       segmented_unit_list := unit_substring :: !segmented_unit_list | ||||||
|   else |   in | ||||||
|     !segmented_unit_list;; |   List.rev !segmented_unit_list;; | ||||||
| 
 |  | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Raphaël Proust
						Raphaël Proust