mirror of
https://github.com/salesforce/CodeT5.git
synced 2024-10-01 06:35:38 -04:00
1185 lines
51 KiB
Python
1185 lines
51 KiB
Python
# Copyright (c) Microsoft Corporation.
|
|
# Licensed under the MIT license.
|
|
|
|
from tree_sitter import Language, Parser
|
|
from .utils import (remove_comments_and_docstrings,
|
|
tree_to_token_index,
|
|
index_to_code_token,
|
|
tree_to_variable_index)
|
|
|
|
|
|
def DFG_python(root_node,index_to_code,states):
|
|
assignment=['assignment','augmented_assignment','for_in_clause']
|
|
if_statement=['if_statement']
|
|
for_statement=['for_statement']
|
|
while_statement=['while_statement']
|
|
do_first_statement=['for_in_clause']
|
|
def_statement=['default_parameter']
|
|
states=states.copy()
|
|
if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
|
|
idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
|
|
if root_node.type==code:
|
|
return [],states
|
|
elif code in states:
|
|
return [(code,idx,'comesFrom',[code],states[code].copy())],states
|
|
else:
|
|
if root_node.type=='identifier':
|
|
states[code]=[idx]
|
|
return [(code,idx,'comesFrom',[],[])],states
|
|
elif root_node.type in def_statement:
|
|
name=root_node.child_by_field_name('name')
|
|
value=root_node.child_by_field_name('value')
|
|
DFG=[]
|
|
if value is None:
|
|
indexs=tree_to_variable_index(name,index_to_code)
|
|
for index in indexs:
|
|
idx,code=index_to_code[index]
|
|
DFG.append((code,idx,'comesFrom',[],[]))
|
|
states[code]=[idx]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
else:
|
|
name_indexs=tree_to_variable_index(name,index_to_code)
|
|
value_indexs=tree_to_variable_index(value,index_to_code)
|
|
temp,states=DFG_python(value,index_to_code,states)
|
|
DFG+=temp
|
|
for index1 in name_indexs:
|
|
idx1,code1=index_to_code[index1]
|
|
for index2 in value_indexs:
|
|
idx2,code2=index_to_code[index2]
|
|
DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
|
|
states[code1]=[idx1]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in assignment:
|
|
if root_node.type=='for_in_clause':
|
|
right_nodes=[root_node.children[-1]]
|
|
left_nodes=[root_node.child_by_field_name('left')]
|
|
else:
|
|
if root_node.child_by_field_name('right') is None:
|
|
return [],states
|
|
left_nodes=[x for x in root_node.child_by_field_name('left').children if x.type!=',']
|
|
right_nodes=[x for x in root_node.child_by_field_name('right').children if x.type!=',']
|
|
if len(right_nodes)!=len(left_nodes):
|
|
left_nodes=[root_node.child_by_field_name('left')]
|
|
right_nodes=[root_node.child_by_field_name('right')]
|
|
if len(left_nodes)==0:
|
|
left_nodes=[root_node.child_by_field_name('left')]
|
|
if len(right_nodes)==0:
|
|
right_nodes=[root_node.child_by_field_name('right')]
|
|
DFG=[]
|
|
for node in right_nodes:
|
|
temp,states=DFG_python(node,index_to_code,states)
|
|
DFG+=temp
|
|
|
|
for left_node,right_node in zip(left_nodes,right_nodes):
|
|
left_tokens_index=tree_to_variable_index(left_node,index_to_code)
|
|
right_tokens_index=tree_to_variable_index(right_node,index_to_code)
|
|
temp=[]
|
|
for token1_index in left_tokens_index:
|
|
idx1,code1=index_to_code[token1_index]
|
|
temp.append((code1,idx1,'computedFrom',[index_to_code[x][1] for x in right_tokens_index],
|
|
[index_to_code[x][0] for x in right_tokens_index]))
|
|
states[code1]=[idx1]
|
|
DFG+=temp
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in if_statement:
|
|
DFG=[]
|
|
current_states=states.copy()
|
|
others_states=[]
|
|
tag=False
|
|
if 'else' in root_node.type:
|
|
tag=True
|
|
for child in root_node.children:
|
|
if 'else' in child.type:
|
|
tag=True
|
|
if child.type not in ['elif_clause','else_clause']:
|
|
temp,current_states=DFG_python(child,index_to_code,current_states)
|
|
DFG+=temp
|
|
else:
|
|
temp,new_states=DFG_python(child,index_to_code,states)
|
|
DFG+=temp
|
|
others_states.append(new_states)
|
|
others_states.append(current_states)
|
|
if tag is False:
|
|
others_states.append(states)
|
|
new_states={}
|
|
for dic in others_states:
|
|
for key in dic:
|
|
if key not in new_states:
|
|
new_states[key]=dic[key].copy()
|
|
else:
|
|
new_states[key]+=dic[key]
|
|
for key in new_states:
|
|
new_states[key]=sorted(list(set(new_states[key])))
|
|
return sorted(DFG,key=lambda x:x[1]),new_states
|
|
elif root_node.type in for_statement:
|
|
DFG=[]
|
|
for i in range(2):
|
|
right_nodes=[x for x in root_node.child_by_field_name('right').children if x.type!=',']
|
|
left_nodes=[x for x in root_node.child_by_field_name('left').children if x.type!=',']
|
|
if len(right_nodes)!=len(left_nodes):
|
|
left_nodes=[root_node.child_by_field_name('left')]
|
|
right_nodes=[root_node.child_by_field_name('right')]
|
|
if len(left_nodes)==0:
|
|
left_nodes=[root_node.child_by_field_name('left')]
|
|
if len(right_nodes)==0:
|
|
right_nodes=[root_node.child_by_field_name('right')]
|
|
for node in right_nodes:
|
|
temp,states=DFG_python(node,index_to_code,states)
|
|
DFG+=temp
|
|
for left_node,right_node in zip(left_nodes,right_nodes):
|
|
left_tokens_index=tree_to_variable_index(left_node,index_to_code)
|
|
right_tokens_index=tree_to_variable_index(right_node,index_to_code)
|
|
temp=[]
|
|
for token1_index in left_tokens_index:
|
|
idx1,code1=index_to_code[token1_index]
|
|
temp.append((code1,idx1,'computedFrom',[index_to_code[x][1] for x in right_tokens_index],
|
|
[index_to_code[x][0] for x in right_tokens_index]))
|
|
states[code1]=[idx1]
|
|
DFG+=temp
|
|
if root_node.children[-1].type=="block":
|
|
temp,states=DFG_python(root_node.children[-1],index_to_code,states)
|
|
DFG+=temp
|
|
dic={}
|
|
for x in DFG:
|
|
if (x[0],x[1],x[2]) not in dic:
|
|
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
|
|
else:
|
|
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
|
|
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
|
|
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in while_statement:
|
|
DFG=[]
|
|
for i in range(2):
|
|
for child in root_node.children:
|
|
temp,states=DFG_python(child,index_to_code,states)
|
|
DFG+=temp
|
|
dic={}
|
|
for x in DFG:
|
|
if (x[0],x[1],x[2]) not in dic:
|
|
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
|
|
else:
|
|
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
|
|
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
|
|
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
else:
|
|
DFG=[]
|
|
for child in root_node.children:
|
|
if child.type in do_first_statement:
|
|
temp,states=DFG_python(child,index_to_code,states)
|
|
DFG+=temp
|
|
for child in root_node.children:
|
|
if child.type not in do_first_statement:
|
|
temp,states=DFG_python(child,index_to_code,states)
|
|
DFG+=temp
|
|
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
|
|
|
|
def DFG_java(root_node,index_to_code,states):
|
|
assignment=['assignment_expression']
|
|
def_statement=['variable_declarator']
|
|
increment_statement=['update_expression']
|
|
if_statement=['if_statement','else']
|
|
for_statement=['for_statement']
|
|
enhanced_for_statement=['enhanced_for_statement']
|
|
while_statement=['while_statement']
|
|
do_first_statement=[]
|
|
states=states.copy()
|
|
if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
|
|
idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
|
|
if root_node.type==code:
|
|
return [],states
|
|
elif code in states:
|
|
return [(code,idx,'comesFrom',[code],states[code].copy())],states
|
|
else:
|
|
if root_node.type=='identifier':
|
|
states[code]=[idx]
|
|
return [(code,idx,'comesFrom',[],[])],states
|
|
elif root_node.type in def_statement:
|
|
name=root_node.child_by_field_name('name')
|
|
value=root_node.child_by_field_name('value')
|
|
DFG=[]
|
|
if value is None:
|
|
indexs=tree_to_variable_index(name,index_to_code)
|
|
for index in indexs:
|
|
idx,code=index_to_code[index]
|
|
DFG.append((code,idx,'comesFrom',[],[]))
|
|
states[code]=[idx]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
else:
|
|
name_indexs=tree_to_variable_index(name,index_to_code)
|
|
value_indexs=tree_to_variable_index(value,index_to_code)
|
|
temp,states=DFG_java(value,index_to_code,states)
|
|
DFG+=temp
|
|
for index1 in name_indexs:
|
|
idx1,code1=index_to_code[index1]
|
|
for index2 in value_indexs:
|
|
idx2,code2=index_to_code[index2]
|
|
DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
|
|
states[code1]=[idx1]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in assignment:
|
|
left_nodes=root_node.child_by_field_name('left')
|
|
right_nodes=root_node.child_by_field_name('right')
|
|
DFG=[]
|
|
temp,states=DFG_java(right_nodes,index_to_code,states)
|
|
DFG+=temp
|
|
name_indexs=tree_to_variable_index(left_nodes,index_to_code)
|
|
value_indexs=tree_to_variable_index(right_nodes,index_to_code)
|
|
for index1 in name_indexs:
|
|
idx1,code1=index_to_code[index1]
|
|
for index2 in value_indexs:
|
|
idx2,code2=index_to_code[index2]
|
|
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
|
|
states[code1]=[idx1]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in increment_statement:
|
|
DFG=[]
|
|
indexs=tree_to_variable_index(root_node,index_to_code)
|
|
for index1 in indexs:
|
|
idx1,code1=index_to_code[index1]
|
|
for index2 in indexs:
|
|
idx2,code2=index_to_code[index2]
|
|
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
|
|
states[code1]=[idx1]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in if_statement:
|
|
DFG=[]
|
|
current_states=states.copy()
|
|
others_states=[]
|
|
flag=False
|
|
tag=False
|
|
if 'else' in root_node.type:
|
|
tag=True
|
|
for child in root_node.children:
|
|
if 'else' in child.type:
|
|
tag=True
|
|
if child.type not in if_statement and flag is False:
|
|
temp,current_states=DFG_java(child,index_to_code,current_states)
|
|
DFG+=temp
|
|
else:
|
|
flag=True
|
|
temp,new_states=DFG_java(child,index_to_code,states)
|
|
DFG+=temp
|
|
others_states.append(new_states)
|
|
others_states.append(current_states)
|
|
if tag is False:
|
|
others_states.append(states)
|
|
new_states={}
|
|
for dic in others_states:
|
|
for key in dic:
|
|
if key not in new_states:
|
|
new_states[key]=dic[key].copy()
|
|
else:
|
|
new_states[key]+=dic[key]
|
|
for key in new_states:
|
|
new_states[key]=sorted(list(set(new_states[key])))
|
|
return sorted(DFG,key=lambda x:x[1]),new_states
|
|
elif root_node.type in for_statement:
|
|
DFG=[]
|
|
for child in root_node.children:
|
|
temp,states=DFG_java(child,index_to_code,states)
|
|
DFG+=temp
|
|
flag=False
|
|
for child in root_node.children:
|
|
if flag:
|
|
temp,states=DFG_java(child,index_to_code,states)
|
|
DFG+=temp
|
|
elif child.type=="local_variable_declaration":
|
|
flag=True
|
|
dic={}
|
|
for x in DFG:
|
|
if (x[0],x[1],x[2]) not in dic:
|
|
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
|
|
else:
|
|
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
|
|
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
|
|
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in enhanced_for_statement:
|
|
name=root_node.child_by_field_name('name')
|
|
value=root_node.child_by_field_name('value')
|
|
body=root_node.child_by_field_name('body')
|
|
DFG=[]
|
|
for i in range(2):
|
|
temp,states=DFG_java(value,index_to_code,states)
|
|
DFG+=temp
|
|
name_indexs=tree_to_variable_index(name,index_to_code)
|
|
value_indexs=tree_to_variable_index(value,index_to_code)
|
|
for index1 in name_indexs:
|
|
idx1,code1=index_to_code[index1]
|
|
for index2 in value_indexs:
|
|
idx2,code2=index_to_code[index2]
|
|
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
|
|
states[code1]=[idx1]
|
|
temp,states=DFG_java(body,index_to_code,states)
|
|
DFG+=temp
|
|
dic={}
|
|
for x in DFG:
|
|
if (x[0],x[1],x[2]) not in dic:
|
|
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
|
|
else:
|
|
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
|
|
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
|
|
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in while_statement:
|
|
DFG=[]
|
|
for i in range(2):
|
|
for child in root_node.children:
|
|
temp,states=DFG_java(child,index_to_code,states)
|
|
DFG+=temp
|
|
dic={}
|
|
for x in DFG:
|
|
if (x[0],x[1],x[2]) not in dic:
|
|
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
|
|
else:
|
|
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
|
|
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
|
|
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
else:
|
|
DFG=[]
|
|
for child in root_node.children:
|
|
if child.type in do_first_statement:
|
|
temp,states=DFG_java(child,index_to_code,states)
|
|
DFG+=temp
|
|
for child in root_node.children:
|
|
if child.type not in do_first_statement:
|
|
temp,states=DFG_java(child,index_to_code,states)
|
|
DFG+=temp
|
|
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
|
|
def DFG_csharp(root_node,index_to_code,states):
|
|
assignment=['assignment_expression']
|
|
def_statement=['variable_declarator']
|
|
increment_statement=['postfix_unary_expression']
|
|
if_statement=['if_statement','else']
|
|
for_statement=['for_statement']
|
|
enhanced_for_statement=['for_each_statement']
|
|
while_statement=['while_statement']
|
|
do_first_statement=[]
|
|
states=states.copy()
|
|
if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
|
|
idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
|
|
if root_node.type==code:
|
|
return [],states
|
|
elif code in states:
|
|
return [(code,idx,'comesFrom',[code],states[code].copy())],states
|
|
else:
|
|
if root_node.type=='identifier':
|
|
states[code]=[idx]
|
|
return [(code,idx,'comesFrom',[],[])],states
|
|
elif root_node.type in def_statement:
|
|
if len(root_node.children)==2:
|
|
name=root_node.children[0]
|
|
value=root_node.children[1]
|
|
else:
|
|
name=root_node.children[0]
|
|
value=None
|
|
DFG=[]
|
|
if value is None:
|
|
indexs=tree_to_variable_index(name,index_to_code)
|
|
for index in indexs:
|
|
idx,code=index_to_code[index]
|
|
DFG.append((code,idx,'comesFrom',[],[]))
|
|
states[code]=[idx]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
else:
|
|
name_indexs=tree_to_variable_index(name,index_to_code)
|
|
value_indexs=tree_to_variable_index(value,index_to_code)
|
|
temp,states=DFG_csharp(value,index_to_code,states)
|
|
DFG+=temp
|
|
for index1 in name_indexs:
|
|
idx1,code1=index_to_code[index1]
|
|
for index2 in value_indexs:
|
|
idx2,code2=index_to_code[index2]
|
|
DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
|
|
states[code1]=[idx1]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in assignment:
|
|
left_nodes=root_node.child_by_field_name('left')
|
|
right_nodes=root_node.child_by_field_name('right')
|
|
DFG=[]
|
|
temp,states=DFG_csharp(right_nodes,index_to_code,states)
|
|
DFG+=temp
|
|
name_indexs=tree_to_variable_index(left_nodes,index_to_code)
|
|
value_indexs=tree_to_variable_index(right_nodes,index_to_code)
|
|
for index1 in name_indexs:
|
|
idx1,code1=index_to_code[index1]
|
|
for index2 in value_indexs:
|
|
idx2,code2=index_to_code[index2]
|
|
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
|
|
states[code1]=[idx1]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in increment_statement:
|
|
DFG=[]
|
|
indexs=tree_to_variable_index(root_node,index_to_code)
|
|
for index1 in indexs:
|
|
idx1,code1=index_to_code[index1]
|
|
for index2 in indexs:
|
|
idx2,code2=index_to_code[index2]
|
|
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
|
|
states[code1]=[idx1]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in if_statement:
|
|
DFG=[]
|
|
current_states=states.copy()
|
|
others_states=[]
|
|
flag=False
|
|
tag=False
|
|
if 'else' in root_node.type:
|
|
tag=True
|
|
for child in root_node.children:
|
|
if 'else' in child.type:
|
|
tag=True
|
|
if child.type not in if_statement and flag is False:
|
|
temp,current_states=DFG_csharp(child,index_to_code,current_states)
|
|
DFG+=temp
|
|
else:
|
|
flag=True
|
|
temp,new_states=DFG_csharp(child,index_to_code,states)
|
|
DFG+=temp
|
|
others_states.append(new_states)
|
|
others_states.append(current_states)
|
|
if tag is False:
|
|
others_states.append(states)
|
|
new_states={}
|
|
for dic in others_states:
|
|
for key in dic:
|
|
if key not in new_states:
|
|
new_states[key]=dic[key].copy()
|
|
else:
|
|
new_states[key]+=dic[key]
|
|
for key in new_states:
|
|
new_states[key]=sorted(list(set(new_states[key])))
|
|
return sorted(DFG,key=lambda x:x[1]),new_states
|
|
elif root_node.type in for_statement:
|
|
DFG=[]
|
|
for child in root_node.children:
|
|
temp,states=DFG_csharp(child,index_to_code,states)
|
|
DFG+=temp
|
|
flag=False
|
|
for child in root_node.children:
|
|
if flag:
|
|
temp,states=DFG_csharp(child,index_to_code,states)
|
|
DFG+=temp
|
|
elif child.type=="local_variable_declaration":
|
|
flag=True
|
|
dic={}
|
|
for x in DFG:
|
|
if (x[0],x[1],x[2]) not in dic:
|
|
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
|
|
else:
|
|
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
|
|
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
|
|
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in enhanced_for_statement:
|
|
name=root_node.child_by_field_name('left')
|
|
value=root_node.child_by_field_name('right')
|
|
body=root_node.child_by_field_name('body')
|
|
DFG=[]
|
|
for i in range(2):
|
|
temp,states=DFG_csharp(value,index_to_code,states)
|
|
DFG+=temp
|
|
name_indexs=tree_to_variable_index(name,index_to_code)
|
|
value_indexs=tree_to_variable_index(value,index_to_code)
|
|
for index1 in name_indexs:
|
|
idx1,code1=index_to_code[index1]
|
|
for index2 in value_indexs:
|
|
idx2,code2=index_to_code[index2]
|
|
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
|
|
states[code1]=[idx1]
|
|
temp,states=DFG_csharp(body,index_to_code,states)
|
|
DFG+=temp
|
|
dic={}
|
|
for x in DFG:
|
|
if (x[0],x[1],x[2]) not in dic:
|
|
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
|
|
else:
|
|
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
|
|
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
|
|
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in while_statement:
|
|
DFG=[]
|
|
for i in range(2):
|
|
for child in root_node.children:
|
|
temp,states=DFG_csharp(child,index_to_code,states)
|
|
DFG+=temp
|
|
dic={}
|
|
for x in DFG:
|
|
if (x[0],x[1],x[2]) not in dic:
|
|
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
|
|
else:
|
|
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
|
|
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
|
|
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
else:
|
|
DFG=[]
|
|
for child in root_node.children:
|
|
if child.type in do_first_statement:
|
|
temp,states=DFG_csharp(child,index_to_code,states)
|
|
DFG+=temp
|
|
for child in root_node.children:
|
|
if child.type not in do_first_statement:
|
|
temp,states=DFG_csharp(child,index_to_code,states)
|
|
DFG+=temp
|
|
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
|
|
|
|
|
|
|
|
def DFG_ruby(root_node,index_to_code,states):
|
|
assignment=['assignment','operator_assignment']
|
|
if_statement=['if','elsif','else','unless','when']
|
|
for_statement=['for']
|
|
while_statement=['while_modifier','until']
|
|
do_first_statement=[]
|
|
def_statement=['keyword_parameter']
|
|
if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
|
|
states=states.copy()
|
|
idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
|
|
if root_node.type==code:
|
|
return [],states
|
|
elif code in states:
|
|
return [(code,idx,'comesFrom',[code],states[code].copy())],states
|
|
else:
|
|
if root_node.type=='identifier':
|
|
states[code]=[idx]
|
|
return [(code,idx,'comesFrom',[],[])],states
|
|
elif root_node.type in def_statement:
|
|
name=root_node.child_by_field_name('name')
|
|
value=root_node.child_by_field_name('value')
|
|
DFG=[]
|
|
if value is None:
|
|
indexs=tree_to_variable_index(name,index_to_code)
|
|
for index in indexs:
|
|
idx,code=index_to_code[index]
|
|
DFG.append((code,idx,'comesFrom',[],[]))
|
|
states[code]=[idx]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
else:
|
|
name_indexs=tree_to_variable_index(name,index_to_code)
|
|
value_indexs=tree_to_variable_index(value,index_to_code)
|
|
temp,states=DFG_ruby(value,index_to_code,states)
|
|
DFG+=temp
|
|
for index1 in name_indexs:
|
|
idx1,code1=index_to_code[index1]
|
|
for index2 in value_indexs:
|
|
idx2,code2=index_to_code[index2]
|
|
DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
|
|
states[code1]=[idx1]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in assignment:
|
|
left_nodes=[x for x in root_node.child_by_field_name('left').children if x.type!=',']
|
|
right_nodes=[x for x in root_node.child_by_field_name('right').children if x.type!=',']
|
|
if len(right_nodes)!=len(left_nodes):
|
|
left_nodes=[root_node.child_by_field_name('left')]
|
|
right_nodes=[root_node.child_by_field_name('right')]
|
|
if len(left_nodes)==0:
|
|
left_nodes=[root_node.child_by_field_name('left')]
|
|
if len(right_nodes)==0:
|
|
right_nodes=[root_node.child_by_field_name('right')]
|
|
if root_node.type=="operator_assignment":
|
|
left_nodes=[root_node.children[0]]
|
|
right_nodes=[root_node.children[-1]]
|
|
|
|
DFG=[]
|
|
for node in right_nodes:
|
|
temp,states=DFG_ruby(node,index_to_code,states)
|
|
DFG+=temp
|
|
|
|
for left_node,right_node in zip(left_nodes,right_nodes):
|
|
left_tokens_index=tree_to_variable_index(left_node,index_to_code)
|
|
right_tokens_index=tree_to_variable_index(right_node,index_to_code)
|
|
temp=[]
|
|
for token1_index in left_tokens_index:
|
|
idx1,code1=index_to_code[token1_index]
|
|
temp.append((code1,idx1,'computedFrom',[index_to_code[x][1] for x in right_tokens_index],
|
|
[index_to_code[x][0] for x in right_tokens_index]))
|
|
states[code1]=[idx1]
|
|
DFG+=temp
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in if_statement:
|
|
DFG=[]
|
|
current_states=states.copy()
|
|
others_states=[]
|
|
tag=False
|
|
if 'else' in root_node.type:
|
|
tag=True
|
|
for child in root_node.children:
|
|
if 'else' in child.type:
|
|
tag=True
|
|
if child.type not in if_statement:
|
|
temp,current_states=DFG_ruby(child,index_to_code,current_states)
|
|
DFG+=temp
|
|
else:
|
|
temp,new_states=DFG_ruby(child,index_to_code,states)
|
|
DFG+=temp
|
|
others_states.append(new_states)
|
|
others_states.append(current_states)
|
|
if tag is False:
|
|
others_states.append(states)
|
|
new_states={}
|
|
for dic in others_states:
|
|
for key in dic:
|
|
if key not in new_states:
|
|
new_states[key]=dic[key].copy()
|
|
else:
|
|
new_states[key]+=dic[key]
|
|
for key in new_states:
|
|
new_states[key]=sorted(list(set(new_states[key])))
|
|
return sorted(DFG,key=lambda x:x[1]),new_states
|
|
elif root_node.type in for_statement:
|
|
DFG=[]
|
|
for i in range(2):
|
|
left_nodes=[root_node.child_by_field_name('pattern')]
|
|
right_nodes=[root_node.child_by_field_name('value')]
|
|
assert len(right_nodes)==len(left_nodes)
|
|
for node in right_nodes:
|
|
temp,states=DFG_ruby(node,index_to_code,states)
|
|
DFG+=temp
|
|
for left_node,right_node in zip(left_nodes,right_nodes):
|
|
left_tokens_index=tree_to_variable_index(left_node,index_to_code)
|
|
right_tokens_index=tree_to_variable_index(right_node,index_to_code)
|
|
temp=[]
|
|
for token1_index in left_tokens_index:
|
|
idx1,code1=index_to_code[token1_index]
|
|
temp.append((code1,idx1,'computedFrom',[index_to_code[x][1] for x in right_tokens_index],
|
|
[index_to_code[x][0] for x in right_tokens_index]))
|
|
states[code1]=[idx1]
|
|
DFG+=temp
|
|
temp,states=DFG_ruby(root_node.child_by_field_name('body'),index_to_code,states)
|
|
DFG+=temp
|
|
dic={}
|
|
for x in DFG:
|
|
if (x[0],x[1],x[2]) not in dic:
|
|
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
|
|
else:
|
|
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
|
|
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
|
|
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in while_statement:
|
|
DFG=[]
|
|
for i in range(2):
|
|
for child in root_node.children:
|
|
temp,states=DFG_ruby(child,index_to_code,states)
|
|
DFG+=temp
|
|
dic={}
|
|
for x in DFG:
|
|
if (x[0],x[1],x[2]) not in dic:
|
|
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
|
|
else:
|
|
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
|
|
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
|
|
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
else:
|
|
DFG=[]
|
|
for child in root_node.children:
|
|
if child.type in do_first_statement:
|
|
temp,states=DFG_ruby(child,index_to_code,states)
|
|
DFG+=temp
|
|
for child in root_node.children:
|
|
if child.type not in do_first_statement:
|
|
temp,states=DFG_ruby(child,index_to_code,states)
|
|
DFG+=temp
|
|
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
|
|
def DFG_go(root_node,index_to_code,states):
|
|
assignment=['assignment_statement',]
|
|
def_statement=['var_spec']
|
|
increment_statement=['inc_statement']
|
|
if_statement=['if_statement','else']
|
|
for_statement=['for_statement']
|
|
enhanced_for_statement=[]
|
|
while_statement=[]
|
|
do_first_statement=[]
|
|
states=states.copy()
|
|
if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
|
|
idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
|
|
if root_node.type==code:
|
|
return [],states
|
|
elif code in states:
|
|
return [(code,idx,'comesFrom',[code],states[code].copy())],states
|
|
else:
|
|
if root_node.type=='identifier':
|
|
states[code]=[idx]
|
|
return [(code,idx,'comesFrom',[],[])],states
|
|
elif root_node.type in def_statement:
|
|
name=root_node.child_by_field_name('name')
|
|
value=root_node.child_by_field_name('value')
|
|
DFG=[]
|
|
if value is None:
|
|
indexs=tree_to_variable_index(name,index_to_code)
|
|
for index in indexs:
|
|
idx,code=index_to_code[index]
|
|
DFG.append((code,idx,'comesFrom',[],[]))
|
|
states[code]=[idx]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
else:
|
|
name_indexs=tree_to_variable_index(name,index_to_code)
|
|
value_indexs=tree_to_variable_index(value,index_to_code)
|
|
temp,states=DFG_go(value,index_to_code,states)
|
|
DFG+=temp
|
|
for index1 in name_indexs:
|
|
idx1,code1=index_to_code[index1]
|
|
for index2 in value_indexs:
|
|
idx2,code2=index_to_code[index2]
|
|
DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
|
|
states[code1]=[idx1]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in assignment:
|
|
left_nodes=root_node.child_by_field_name('left')
|
|
right_nodes=root_node.child_by_field_name('right')
|
|
DFG=[]
|
|
temp,states=DFG_go(right_nodes,index_to_code,states)
|
|
DFG+=temp
|
|
name_indexs=tree_to_variable_index(left_nodes,index_to_code)
|
|
value_indexs=tree_to_variable_index(right_nodes,index_to_code)
|
|
for index1 in name_indexs:
|
|
idx1,code1=index_to_code[index1]
|
|
for index2 in value_indexs:
|
|
idx2,code2=index_to_code[index2]
|
|
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
|
|
states[code1]=[idx1]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in increment_statement:
|
|
DFG=[]
|
|
indexs=tree_to_variable_index(root_node,index_to_code)
|
|
for index1 in indexs:
|
|
idx1,code1=index_to_code[index1]
|
|
for index2 in indexs:
|
|
idx2,code2=index_to_code[index2]
|
|
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
|
|
states[code1]=[idx1]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in if_statement:
|
|
DFG=[]
|
|
current_states=states.copy()
|
|
others_states=[]
|
|
flag=False
|
|
tag=False
|
|
if 'else' in root_node.type:
|
|
tag=True
|
|
for child in root_node.children:
|
|
if 'else' in child.type:
|
|
tag=True
|
|
if child.type not in if_statement and flag is False:
|
|
temp,current_states=DFG_go(child,index_to_code,current_states)
|
|
DFG+=temp
|
|
else:
|
|
flag=True
|
|
temp,new_states=DFG_go(child,index_to_code,states)
|
|
DFG+=temp
|
|
others_states.append(new_states)
|
|
others_states.append(current_states)
|
|
if tag is False:
|
|
others_states.append(states)
|
|
new_states={}
|
|
for dic in others_states:
|
|
for key in dic:
|
|
if key not in new_states:
|
|
new_states[key]=dic[key].copy()
|
|
else:
|
|
new_states[key]+=dic[key]
|
|
for key in states:
|
|
if key not in new_states:
|
|
new_states[key]=states[key]
|
|
else:
|
|
new_states[key]+=states[key]
|
|
for key in new_states:
|
|
new_states[key]=sorted(list(set(new_states[key])))
|
|
return sorted(DFG,key=lambda x:x[1]),new_states
|
|
elif root_node.type in for_statement:
|
|
DFG=[]
|
|
for child in root_node.children:
|
|
temp,states=DFG_go(child,index_to_code,states)
|
|
DFG+=temp
|
|
flag=False
|
|
for child in root_node.children:
|
|
if flag:
|
|
temp,states=DFG_go(child,index_to_code,states)
|
|
DFG+=temp
|
|
elif child.type=="for_clause":
|
|
if child.child_by_field_name('update') is not None:
|
|
temp,states=DFG_go(child.child_by_field_name('update'),index_to_code,states)
|
|
DFG+=temp
|
|
flag=True
|
|
dic={}
|
|
for x in DFG:
|
|
if (x[0],x[1],x[2]) not in dic:
|
|
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
|
|
else:
|
|
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
|
|
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
|
|
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
else:
|
|
DFG=[]
|
|
for child in root_node.children:
|
|
if child.type in do_first_statement:
|
|
temp,states=DFG_go(child,index_to_code,states)
|
|
DFG+=temp
|
|
for child in root_node.children:
|
|
if child.type not in do_first_statement:
|
|
temp,states=DFG_go(child,index_to_code,states)
|
|
DFG+=temp
|
|
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
|
|
|
|
|
|
|
|
def DFG_php(root_node,index_to_code,states):
|
|
assignment=['assignment_expression','augmented_assignment_expression']
|
|
def_statement=['simple_parameter']
|
|
increment_statement=['update_expression']
|
|
if_statement=['if_statement','else_clause']
|
|
for_statement=['for_statement']
|
|
enhanced_for_statement=['foreach_statement']
|
|
while_statement=['while_statement']
|
|
do_first_statement=[]
|
|
states=states.copy()
|
|
if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
|
|
idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
|
|
if root_node.type==code:
|
|
return [],states
|
|
elif code in states:
|
|
return [(code,idx,'comesFrom',[code],states[code].copy())],states
|
|
else:
|
|
if root_node.type=='identifier':
|
|
states[code]=[idx]
|
|
return [(code,idx,'comesFrom',[],[])],states
|
|
elif root_node.type in def_statement:
|
|
name=root_node.child_by_field_name('name')
|
|
value=root_node.child_by_field_name('default_value')
|
|
DFG=[]
|
|
if value is None:
|
|
indexs=tree_to_variable_index(name,index_to_code)
|
|
for index in indexs:
|
|
idx,code=index_to_code[index]
|
|
DFG.append((code,idx,'comesFrom',[],[]))
|
|
states[code]=[idx]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
else:
|
|
name_indexs=tree_to_variable_index(name,index_to_code)
|
|
value_indexs=tree_to_variable_index(value,index_to_code)
|
|
temp,states=DFG_php(value,index_to_code,states)
|
|
DFG+=temp
|
|
for index1 in name_indexs:
|
|
idx1,code1=index_to_code[index1]
|
|
for index2 in value_indexs:
|
|
idx2,code2=index_to_code[index2]
|
|
DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
|
|
states[code1]=[idx1]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in assignment:
|
|
left_nodes=root_node.child_by_field_name('left')
|
|
right_nodes=root_node.child_by_field_name('right')
|
|
DFG=[]
|
|
temp,states=DFG_php(right_nodes,index_to_code,states)
|
|
DFG+=temp
|
|
name_indexs=tree_to_variable_index(left_nodes,index_to_code)
|
|
value_indexs=tree_to_variable_index(right_nodes,index_to_code)
|
|
for index1 in name_indexs:
|
|
idx1,code1=index_to_code[index1]
|
|
for index2 in value_indexs:
|
|
idx2,code2=index_to_code[index2]
|
|
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
|
|
states[code1]=[idx1]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in increment_statement:
|
|
DFG=[]
|
|
indexs=tree_to_variable_index(root_node,index_to_code)
|
|
for index1 in indexs:
|
|
idx1,code1=index_to_code[index1]
|
|
for index2 in indexs:
|
|
idx2,code2=index_to_code[index2]
|
|
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
|
|
states[code1]=[idx1]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in if_statement:
|
|
DFG=[]
|
|
current_states=states.copy()
|
|
others_states=[]
|
|
flag=False
|
|
tag=False
|
|
if 'else' in root_node.type:
|
|
tag=True
|
|
for child in root_node.children:
|
|
if 'else' in child.type:
|
|
tag=True
|
|
if child.type not in if_statement and flag is False:
|
|
temp,current_states=DFG_php(child,index_to_code,current_states)
|
|
DFG+=temp
|
|
else:
|
|
flag=True
|
|
temp,new_states=DFG_php(child,index_to_code,states)
|
|
DFG+=temp
|
|
others_states.append(new_states)
|
|
others_states.append(current_states)
|
|
new_states={}
|
|
for dic in others_states:
|
|
for key in dic:
|
|
if key not in new_states:
|
|
new_states[key]=dic[key].copy()
|
|
else:
|
|
new_states[key]+=dic[key]
|
|
for key in states:
|
|
if key not in new_states:
|
|
new_states[key]=states[key]
|
|
else:
|
|
new_states[key]+=states[key]
|
|
for key in new_states:
|
|
new_states[key]=sorted(list(set(new_states[key])))
|
|
return sorted(DFG,key=lambda x:x[1]),new_states
|
|
elif root_node.type in for_statement:
|
|
DFG=[]
|
|
for child in root_node.children:
|
|
temp,states=DFG_php(child,index_to_code,states)
|
|
DFG+=temp
|
|
flag=False
|
|
for child in root_node.children:
|
|
if flag:
|
|
temp,states=DFG_php(child,index_to_code,states)
|
|
DFG+=temp
|
|
elif child.type=="assignment_expression":
|
|
flag=True
|
|
dic={}
|
|
for x in DFG:
|
|
if (x[0],x[1],x[2]) not in dic:
|
|
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
|
|
else:
|
|
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
|
|
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
|
|
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in enhanced_for_statement:
|
|
name=None
|
|
value=None
|
|
for child in root_node.children:
|
|
if child.type=='variable_name' and value is None:
|
|
value=child
|
|
elif child.type=='variable_name' and name is None:
|
|
name=child
|
|
break
|
|
body=root_node.child_by_field_name('body')
|
|
DFG=[]
|
|
for i in range(2):
|
|
temp,states=DFG_php(value,index_to_code,states)
|
|
DFG+=temp
|
|
name_indexs=tree_to_variable_index(name,index_to_code)
|
|
value_indexs=tree_to_variable_index(value,index_to_code)
|
|
for index1 in name_indexs:
|
|
idx1,code1=index_to_code[index1]
|
|
for index2 in value_indexs:
|
|
idx2,code2=index_to_code[index2]
|
|
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
|
|
states[code1]=[idx1]
|
|
temp,states=DFG_php(body,index_to_code,states)
|
|
DFG+=temp
|
|
dic={}
|
|
for x in DFG:
|
|
if (x[0],x[1],x[2]) not in dic:
|
|
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
|
|
else:
|
|
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
|
|
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
|
|
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in while_statement:
|
|
DFG=[]
|
|
for i in range(2):
|
|
for child in root_node.children:
|
|
temp,states=DFG_php(child,index_to_code,states)
|
|
DFG+=temp
|
|
dic={}
|
|
for x in DFG:
|
|
if (x[0],x[1],x[2]) not in dic:
|
|
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
|
|
else:
|
|
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
|
|
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
|
|
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
else:
|
|
DFG=[]
|
|
for child in root_node.children:
|
|
if child.type in do_first_statement:
|
|
temp,states=DFG_php(child,index_to_code,states)
|
|
DFG+=temp
|
|
for child in root_node.children:
|
|
if child.type not in do_first_statement:
|
|
temp,states=DFG_php(child,index_to_code,states)
|
|
DFG+=temp
|
|
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
|
|
|
|
def DFG_javascript(root_node,index_to_code,states):
|
|
assignment=['assignment_pattern','augmented_assignment_expression']
|
|
def_statement=['variable_declarator']
|
|
increment_statement=['update_expression']
|
|
if_statement=['if_statement','else']
|
|
for_statement=['for_statement']
|
|
enhanced_for_statement=[]
|
|
while_statement=['while_statement']
|
|
do_first_statement=[]
|
|
states=states.copy()
|
|
if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
|
|
idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
|
|
if root_node.type==code:
|
|
return [],states
|
|
elif code in states:
|
|
return [(code,idx,'comesFrom',[code],states[code].copy())],states
|
|
else:
|
|
if root_node.type=='identifier':
|
|
states[code]=[idx]
|
|
return [(code,idx,'comesFrom',[],[])],states
|
|
elif root_node.type in def_statement:
|
|
name=root_node.child_by_field_name('name')
|
|
value=root_node.child_by_field_name('value')
|
|
DFG=[]
|
|
if value is None:
|
|
indexs=tree_to_variable_index(name,index_to_code)
|
|
for index in indexs:
|
|
idx,code=index_to_code[index]
|
|
DFG.append((code,idx,'comesFrom',[],[]))
|
|
states[code]=[idx]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
else:
|
|
name_indexs=tree_to_variable_index(name,index_to_code)
|
|
value_indexs=tree_to_variable_index(value,index_to_code)
|
|
temp,states=DFG_javascript(value,index_to_code,states)
|
|
DFG+=temp
|
|
for index1 in name_indexs:
|
|
idx1,code1=index_to_code[index1]
|
|
for index2 in value_indexs:
|
|
idx2,code2=index_to_code[index2]
|
|
DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
|
|
states[code1]=[idx1]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in assignment:
|
|
left_nodes=root_node.child_by_field_name('left')
|
|
right_nodes=root_node.child_by_field_name('right')
|
|
DFG=[]
|
|
temp,states=DFG_javascript(right_nodes,index_to_code,states)
|
|
DFG+=temp
|
|
name_indexs=tree_to_variable_index(left_nodes,index_to_code)
|
|
value_indexs=tree_to_variable_index(right_nodes,index_to_code)
|
|
for index1 in name_indexs:
|
|
idx1,code1=index_to_code[index1]
|
|
for index2 in value_indexs:
|
|
idx2,code2=index_to_code[index2]
|
|
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
|
|
states[code1]=[idx1]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in increment_statement:
|
|
DFG=[]
|
|
indexs=tree_to_variable_index(root_node,index_to_code)
|
|
for index1 in indexs:
|
|
idx1,code1=index_to_code[index1]
|
|
for index2 in indexs:
|
|
idx2,code2=index_to_code[index2]
|
|
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
|
|
states[code1]=[idx1]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in if_statement:
|
|
DFG=[]
|
|
current_states=states.copy()
|
|
others_states=[]
|
|
flag=False
|
|
tag=False
|
|
if 'else' in root_node.type:
|
|
tag=True
|
|
for child in root_node.children:
|
|
if 'else' in child.type:
|
|
tag=True
|
|
if child.type not in if_statement and flag is False:
|
|
temp,current_states=DFG_javascript(child,index_to_code,current_states)
|
|
DFG+=temp
|
|
else:
|
|
flag=True
|
|
temp,new_states=DFG_javascript(child,index_to_code,states)
|
|
DFG+=temp
|
|
others_states.append(new_states)
|
|
others_states.append(current_states)
|
|
if tag is False:
|
|
others_states.append(states)
|
|
new_states={}
|
|
for dic in others_states:
|
|
for key in dic:
|
|
if key not in new_states:
|
|
new_states[key]=dic[key].copy()
|
|
else:
|
|
new_states[key]+=dic[key]
|
|
for key in states:
|
|
if key not in new_states:
|
|
new_states[key]=states[key]
|
|
else:
|
|
new_states[key]+=states[key]
|
|
for key in new_states:
|
|
new_states[key]=sorted(list(set(new_states[key])))
|
|
return sorted(DFG,key=lambda x:x[1]),new_states
|
|
elif root_node.type in for_statement:
|
|
DFG=[]
|
|
for child in root_node.children:
|
|
temp,states=DFG_javascript(child,index_to_code,states)
|
|
DFG+=temp
|
|
flag=False
|
|
for child in root_node.children:
|
|
if flag:
|
|
temp,states=DFG_javascript(child,index_to_code,states)
|
|
DFG+=temp
|
|
elif child.type=="variable_declaration":
|
|
flag=True
|
|
dic={}
|
|
for x in DFG:
|
|
if (x[0],x[1],x[2]) not in dic:
|
|
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
|
|
else:
|
|
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
|
|
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
|
|
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
elif root_node.type in while_statement:
|
|
DFG=[]
|
|
for i in range(2):
|
|
for child in root_node.children:
|
|
temp,states=DFG_javascript(child,index_to_code,states)
|
|
DFG+=temp
|
|
dic={}
|
|
for x in DFG:
|
|
if (x[0],x[1],x[2]) not in dic:
|
|
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
|
|
else:
|
|
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
|
|
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
|
|
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
else:
|
|
DFG=[]
|
|
for child in root_node.children:
|
|
if child.type in do_first_statement:
|
|
temp,states=DFG_javascript(child,index_to_code,states)
|
|
DFG+=temp
|
|
for child in root_node.children:
|
|
if child.type not in do_first_statement:
|
|
temp,states=DFG_javascript(child,index_to_code,states)
|
|
DFG+=temp
|
|
|
|
return sorted(DFG,key=lambda x:x[1]),states
|
|
|
|
|
|
|