-
Notifications
You must be signed in to change notification settings - Fork 5
/
performance_model.py
136 lines (101 loc) · 4.42 KB
/
performance_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
"""
"""
import re
from os.path import join
import pandas as pd
from configuration import DATA_PATH
from language_utils import regex_to_big_query, generate_bq_function, match, SCHEMA_NAME, print_logic_to_bq\
, build_separated_terms, build_non_positive_linguistic, REGULAR_SUFFIX, NEAR_ENOUGH, VERB_E_SUFFIX
from model_evaluation import classifiy_commits_df, evaluate_performance, evaluate_concept_classifier
# Not sure list
"""
"""
positive_terms = [
'better' + NEAR_ENOUGH + 'time(?:s)?',
#'(cpu|gpu|tpu)',
#'day(?:s)?',
'(fast|faster|fastest)',
#'hour(?:s)?',
'improv' + VERB_E_SUFFIX + NEAR_ENOUGH + 'time(?:s)?',
'(long|longer|short|shorter|above|least)' + NEAR_ENOUGH + 'time(?:s)?',
#'minute(?:s)?',
'optimis' + VERB_E_SUFFIX,
'optimisation',
'optimiz' + VERB_E_SUFFIX,
'optimization',
'performance',
'reduc' + VERB_E_SUFFIX + NEAR_ENOUGH + 'time(?:s)?',
#'second(?:s)?',
'(speed|speeding)',
'tak' + VERB_E_SUFFIX + NEAR_ENOUGH + 'time(?:s)?',
#'run(?: |-)?time(?:s)?',
'(slow|slower|slowest)',
]
excluded_terms = ['[a-z0-9/\.-]*fast/[a-z0-9/\.-]*',
'optimize imports', # Common as a command in IDEs like JetBrains
'performance suite(?:s)?',
'performance (testing|test|tests)',
'renam' + VERB_E_SUFFIX + NEAR_ENOUGH + 'fast',
'sometime(?:s)?',
'(unnoticed|found)' + NEAR_ENOUGH + 'long time',
'optimize (align|alignment|arrange|arrangement)'
]
def build_positive_regex():
return build_separated_terms(positive_terms)
def build_excluded_regex():
return build_separated_terms(excluded_terms)
def build_not_positive_regex():
return build_non_positive_linguistic(build_positive_regex())
def is_performance(commit_text):
return (len(re.findall(build_positive_regex(), commit_text))
- len(re.findall(build_excluded_regex(), commit_text))
- len(re.findall(build_not_positive_regex(), commit_text))) > 0
def performance_to_bq():
concept = 'performance'
print("# " + concept)
print( "# " + concept + ": Core")
#print( ",")
print("{schema}.bq_core_{concept}(message)".format(schema=SCHEMA_NAME
, concept=concept))
print(" - ")
print("# " + concept + ": Excluded")
print("{schema}.bq_excluded_{concept}(message)".format(schema=SCHEMA_NAME
, concept=concept))
print(" - ")
print("# " + concept + ": not positive")
print("{schema}.bq_not_positive_{concept}(message)".format(schema=SCHEMA_NAME
, concept=concept))
print("# end - " + concept)
def print_concepts_functions_for_bq(commit: str = 'XXX'):
concept = 'performance'
concepts = {'core_' + concept : build_positive_regex
, 'excluded_' + concept : build_excluded_regex
, 'not_positive_' + concept : build_not_positive_regex
#, 'swearing': swearing_to_bq
}
for i in concepts.keys():
print()
print_func = lambda : print_logic_to_bq(regex_func=concepts[i]
, concept=i)
generate_bq_function('{schema}.bq_{concept}'.format(schema=SCHEMA_NAME
, concept=i)
, print_func
, commit=commit)
print()
generate_bq_function('{schema}.bq_{concept}'.format(schema=SCHEMA_NAME
, concept=concept)
, performance_to_bq
, commit=commit)
print()
def evaluate_performance_classifier():
evaluate_concept_classifier(concept='performance'
, text_name='message'
, classification_function=is_performance
, samples_file=join(DATA_PATH, 'commit_performance_samples.csv'))
if __name__ == '__main__':
print_concepts_functions_for_bq(commit='37802ac3c316a3c7a3befcf774838f282cdf162a')
#evaluate_performance_classifier()
text = """Update to optimize align/arrange buttons
""".lower()
print("is performance", is_performance(text))
print("performance in text", re.findall(build_positive_regex(), text))