Skip to content

Add Solidity to programming languages #50

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ if(NOT EXISTS ${PROJECT_SOURCE_DIR}/src/tree-sitter/tree-sitter-php)
)
endif()

if(NOT EXISTS ${PROJECT_SOURCE_DIR}/src/tree-sitter/tree-sitter-solidity)
execute_process(
COMMAND git clone https://github.com/JoranHonig/tree-sitter-solidity.git ${PROJECT_SOURCE_DIR}/src/tree-sitter/tree-sitter-solidity
)
endif()

if(NOT EXISTS ${PROJECT_SOURCE_DIR}/src/tree-sitter/tree-sitter-cpp)
execute_process(
COMMAND git clone https://github.com/tree-sitter/tree-sitter-cpp.git ${PROJECT_SOURCE_DIR}/src/tree-sitter/tree-sitter-cpp
Expand Down
9 changes: 6 additions & 3 deletions scripts/mine_patterns.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ function print_usage() {
else
echo "[-n number_of_processes_to_use_for_mining] (default: num_cpus_on_system)"
fi
echo "[-l source_language_number] (default: 1 (C), supported: 1 (C), 2 (Verilog), 3 (PHP), 4 (C++)"
echo "[-l source_language_number] (default: 1 (C), supported: 1 (C), 2 (Verilog), 3 (PHP), 4 (C++), 5 (Solidity)"
echo "[-g github_repo_id] (default: 0) A unique identifier for GitHub repository, if any"
exit
}
Expand Down Expand Up @@ -43,9 +43,9 @@ then
print_usage $0
fi

if (( ${LANGUAGE} < 1 || ${LANGUAGE} > 4 ));
if (( ${LANGUAGE} < 1 || ${LANGUAGE} > 5 ));
then
echo "ERROR: Only 1 (C), 2 (Verilog), 3 (PHP), and 4 (C++) are supported languages; received ${LANGUAGE}"
echo "ERROR: Only 1 (C), 2 (Verilog), 3 (PHP), 4 (C++), 5 (Solidity) are supported languages; received ${LANGUAGE}"
print_usage $0
fi

Expand All @@ -69,6 +69,9 @@ then
elif [ "${LANGUAGE}" = "4" ];
then
find "${TRAIN_DIR}" -iname "*.cpp" -o -iname "*.cc" -o -iname "*.cxx" -o -iname "*.h" -o -iname "*.hpp" -o -iname "*.hxx" -type f > ${FILE_LIST}
elif [ "${LANGUAGE}" = "5" ];
then
find "${TRAIN_DIR}" -iname "*.sol" -type f > ${FILE_LIST}
fi

SCRIPTS_DIR=`dirname $0`
Expand Down
9 changes: 6 additions & 3 deletions scripts/scan_for_anomalies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ function print_usage() {
fi
echo " [-o output_log_dir] (default: /tmp)"
echo " [-a anomaly_threshold] (default: 3.0)"
echo " [-l source_language_number] (default: 1 (C), supported: 1 (C), 2 (Verilog), 3 (PHP), 4 (C++)"
echo " [-l source_language_number] (default: 1 (C), supported: 1 (C), 2 (Verilog), 3 (PHP), 4 (C++), 5 (Solidity)"

exit
}
Expand Down Expand Up @@ -58,9 +58,9 @@ then
print_usage $0
fi

if (( ${LANGUAGE} < 1 || ${LANGUAGE} > 4 ));
if (( ${LANGUAGE} < 1 || ${LANGUAGE} > 5 ));
then
echo "ERROR: Only 1 (C), 2 (Verilog), 3 (PHP), and 4 (C++) are supported languages; received ${LANGUAGE}"
echo "ERROR: Only 1 (C), 2 (Verilog), 3 (PHP), 4 (C++), 5 (Solidity) are supported languages; received ${LANGUAGE}"
print_usage $0
fi

Expand All @@ -77,6 +77,9 @@ then
elif [ "${LANGUAGE}" = "4" ];
then
find "${SCAN_DIR}" -iname "*.cpp" -o -iname "*.cc" -o -iname "*.cxx" -o -iname "*.h" -o -iname "*.hpp" -o -iname "*.hxx" -type f > ${SCAN_FILE_LIST}
elif [ "${LANGUAGE}" = "5" ];
then
find "${SCAN_DIR}" -iname "*.sol" -type f > ${SCAN_FILE_LIST}
fi

SCRIPTS_DIR=`dirname $0`
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ set(COMMON_LINK_LIBRARIES
tree-sitter
tree-sitter-c
tree-sitter-php
tree-sitter-solidity
tree-sitter-cpp
tree-sitter-verilog
pthread
Expand Down
5 changes: 4 additions & 1 deletion src/cf_dump_code_blocks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ int handle_command_args(int argc, char* argv[], CFDumpArgs& command_args) {
<< std::endl
<< " [-l source_language_number] (default: "
<< LANGUAGE_C << ")"
<< ", supported: 1 (C), 2 (Verilog), 3 (PHP), 4 (C++)"
<< ", supported: 1 (C), 2 (Verilog), 3 (PHP), 4 (C++), 5 (SOLIDITY)"
<< std::endl;
};

Expand Down Expand Up @@ -137,6 +137,9 @@ int main(int argc, char* argv[]) {
case LANGUAGE_PHP:
DumpCodeBlocksFromSourceFile<LANGUAGE_PHP>(command_args);
break;
case LANGUAGE_SOLIDITY:
DumpCodeBlocksFromSourceFile<LANGUAGE_SOLIDITY>(command_args);
break;
case LANGUAGE_CPP:
DumpCodeBlocksFromSourceFile<LANGUAGE_CPP>(command_args);
break;
Expand Down
6 changes: 5 additions & 1 deletion src/cf_file_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ static int handle_command_args(int argc, char* argv[], FileScannerArgs& args) {
<< " [-a anomaly_threshold] (default: 3.0)"
<< std::endl
<< " [-l source_language_number] (default: 1 (C), "
<< "supported: 1 (C), 2 (Verilog), 3 (PHP), 4 (C++))"
<< "supported: 1 (C), 2 (Verilog), 3 (PHP), 4 (C++), 5 (SOLIDITY) )"
<< std::endl
<< " [-v log_level ] (default: 0, "
<< "{ERROR, 0}, {INFO, 1}, {DEBUG, 2})"
Expand Down Expand Up @@ -162,6 +162,10 @@ int main(int argc, char* argv[]) {
status = train_and_scan_util.ScanFile<LANGUAGE_PHP>(eval_file,
log_file);
break;
case LANGUAGE_SOLIDITY:
status = train_and_scan_util.ScanFile<LANGUAGE_SOLIDITY>(eval_file,
log_file);
break;
case LANGUAGE_CPP:
status = train_and_scan_util.ScanFile<LANGUAGE_CPP>(eval_file,
log_file);
Expand Down
7 changes: 7 additions & 0 deletions src/common_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ ManagedTSTree GetTSTree<LANGUAGE_VERILOG>(const std::string&, bool);
template
ManagedTSTree GetTSTree<LANGUAGE_PHP>(const std::string&, bool);
template
ManagedTSTree GetTSTree<LANGUAGE_SOLIDITY>(const std::string&, bool);
template
ManagedTSTree GetTSTree<LANGUAGE_CPP>(const std::string&, bool);
template
ManagedTSTree GetTSTree<LANGUAGE_C>(const std::string&, std::string&);
Expand All @@ -135,6 +137,8 @@ ManagedTSTree GetTSTree<LANGUAGE_VERILOG>(const std::string&, std::string&);
template
ManagedTSTree GetTSTree<LANGUAGE_PHP>(const std::string&, std::string&);
template
ManagedTSTree GetTSTree<LANGUAGE_SOLIDITY>(const std::string&, std::string&);
template
ManagedTSTree GetTSTree<LANGUAGE_CPP>(const std::string&, std::string&);
template
void CollectCodeBlocksOfInterest<LANGUAGE_C>(const ManagedTSTree&,
Expand All @@ -146,5 +150,8 @@ template
void CollectCodeBlocksOfInterest<LANGUAGE_PHP>(const ManagedTSTree &,
code_blocks_t&);
template
void CollectCodeBlocksOfInterest<LANGUAGE_SOLIDITY>(const ManagedTSTree &,
code_blocks_t&);
template
void CollectCodeBlocksOfInterest<LANGUAGE_CPP>(const ManagedTSTree &,
code_blocks_t&);
18 changes: 17 additions & 1 deletion src/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,15 @@
extern "C" const TSLanguage *tree_sitter_c();
extern "C" const TSLanguage *tree_sitter_verilog();
extern "C" const TSLanguage *tree_sitter_php();
extern "C" const TSLanguage *tree_sitter_solidity();
extern "C" const TSLanguage *tree_sitter_cpp();

enum Language {
LANGUAGE_C = 1,
LANGUAGE_VERILOG = 2,
LANGUAGE_PHP = 3,
LANGUAGE_CPP = 4
LANGUAGE_CPP = 4,
LANGUAGE_SOLIDITY = 5
};

#define LANGUAGE_MIN LANGUAGE_C
Expand Down Expand Up @@ -68,6 +70,10 @@ template <> inline const TSLanguage* GetTSLanguage<LANGUAGE_PHP> () {
return tree_sitter_php();
}

template <> inline const TSLanguage* GetTSLanguage<LANGUAGE_SOLIDITY> () {
return tree_sitter_solidity();
}

template<Language L>
class ParserBase {
public:
Expand Down Expand Up @@ -126,6 +132,9 @@ template <> inline bool IsIfStatement<LANGUAGE_VERILOG>(const TSNode& node) {
template <> inline bool IsIfStatement<LANGUAGE_PHP>(const TSNode& node) {
return IsTSNodeofType(node, "if_statement");
}
template <> inline bool IsIfStatement<LANGUAGE_SOLIDITY>(const TSNode& node) {
return IsTSNodeofType(node, "if_statement");
}
template <> inline bool IsIfStatement<LANGUAGE_CPP>(const TSNode& node) {
return IsTSNodeofType(node, "if_statement");
}
Expand Down Expand Up @@ -214,6 +223,13 @@ inline TSNode GetIfConditionNode<LANGUAGE_PHP>(const TSNode& if_statement) {
kIfCondition.c_str(), kIfCondition.length());
}

template <>
inline TSNode GetIfConditionNode<LANGUAGE_SOLIDITY>(const TSNode& if_statement) {
const std::string& kIfCondition = "condition";
return ts_node_child_by_field_name(if_statement,
kIfCondition.c_str(), kIfCondition.length());
}

std::string OriginalSourceExpression(const TSNode&, const std::string&);

template <Language L>
Expand Down
4 changes: 4 additions & 0 deletions src/train_and_scan_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ template int TrainAndScanUtil::ScanFile<LANGUAGE_VERILOG>(
const std::string& test_file, std::ostream& log_file) const;
template int TrainAndScanUtil::ScanFile<LANGUAGE_PHP>(
const std::string& test_file, std::ostream& log_file) const;
template int TrainAndScanUtil::ScanFile<LANGUAGE_SOLIDITY>(
const std::string& test_file, std::ostream& log_file) const;
template int TrainAndScanUtil::ScanFile<LANGUAGE_CPP>(
const std::string& test_file, std::ostream& log_file) const;
template int TrainAndScanUtil::ScanExpression<LANGUAGE_C>(
Expand All @@ -38,6 +40,8 @@ template int TrainAndScanUtil::ScanExpression<LANGUAGE_VERILOG>(
const std::string& expression, std::ostream& log_file) const;
template int TrainAndScanUtil::ScanExpression<LANGUAGE_PHP>(
const std::string& expression, std::ostream& log_file) const;
template int TrainAndScanUtil::ScanExpression<LANGUAGE_SOLIDITY>(
const std::string& expression, std::ostream& log_file) const;
template int TrainAndScanUtil::ScanExpression<LANGUAGE_CPP>(
const std::string& expression, std::ostream& log_file) const;

Expand Down
9 changes: 9 additions & 0 deletions src/tree-sitter/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@ target_include_directories(tree-sitter-php
tree-sitter-php/src
)

add_library(tree-sitter-solidity STATIC
tree-sitter-solidity/src/parser.c
)

target_include_directories(tree-sitter-solidity
PRIVATE
tree-sitter-solidity/src
)

add_library(tree-sitter-cpp STATIC
tree-sitter-cpp/src/parser.c
tree-sitter-cpp/src/scanner.cc
Expand Down
18 changes: 18 additions & 0 deletions src/tree_abstraction.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,12 @@ inline std::string NodeToString<LEVEL_MIN, LANGUAGE_PHP>(
return NodeToString<LEVEL_MIN, LANGUAGE_C>(conditional_expression);
}

template <>
inline std::string NodeToString<LEVEL_MIN, LANGUAGE_SOLIDITY>(
const TSNode& conditional_expression) {
return NodeToString<LEVEL_MIN, LANGUAGE_C>(conditional_expression);
}

// ---------------------------------------------------------------------------

inline std::string AbstractTerminalString(const TSNode& node) {
Expand Down Expand Up @@ -405,6 +411,12 @@ inline std::string NodeToString<LEVEL_TWO, LANGUAGE_PHP>(
const TSNode& conditional_expression) {
return NodeToString<LEVEL_MIN, LANGUAGE_PHP>(conditional_expression);
}

template <>
inline std::string NodeToString<LEVEL_TWO, LANGUAGE_SOLIDITY>(
const TSNode& conditional_expression) {
return NodeToString<LEVEL_MIN, LANGUAGE_SOLIDITY>(conditional_expression);
}
// -----------------------------------------------------------------------
// Close to full-detailed level with using Tree-sitter print. Only
// difference is in printing operators for binary and unary ops.
Expand Down Expand Up @@ -456,6 +468,12 @@ inline std::string NodeToString<LEVEL_ONE, LANGUAGE_PHP>(
return NodeToString<LEVEL_ONE, LANGUAGE_C>(conditional_expression);
}

template <>
inline std::string NodeToString<LEVEL_ONE, LANGUAGE_SOLIDITY>(
const TSNode& conditional_expression) {
return NodeToString<LEVEL_ONE, LANGUAGE_C>(conditional_expression);
}

template <>
inline std::string NodeToString<LEVEL_ONE, LANGUAGE_CPP>(
const TSNode& conditional_expression) {
Expand Down
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ foreach(file ${files})
tree-sitter
tree-sitter-c
tree-sitter-php
tree-sitter-solidity
tree-sitter-cpp
tree-sitter-verilog
pthread)
Expand Down