Skip to content
Snippets Groups Projects
Commit 87738189 authored by Josef Schmeißer's avatar Josef Schmeißer
Browse files

Initial commit

parents
No related branches found
No related tags found
No related merge requests found
---
Language: Cpp
# BasedOnStyle: Google
AccessModifierOffset: -1
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: Left
AlignOperands: true
AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: All
AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: true
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: true
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeInheritanceComma: false
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: true
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeBlocks: Preserve
IncludeCategories:
- Regex: '^<ext/.*\.h>'
Priority: 2
- Regex: '^<.*\.h>'
Priority: 1
- Regex: '^<.*'
Priority: 2
- Regex: '.*'
Priority: 3
IncludeIsMainRegex: '([-_](test|unittest))?$'
IndentCaseLabels: true
IndentPPDirectives: None
IndentWidth: 4
IndentWrappedFunctionNames: false
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
RawStringFormats:
- Delimiter: pb
Language: TextProto
BasedOnStyle: google
ReflowComments: true
SortIncludes: true
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Auto
TabWidth: 8
UseTab: Never
...
.vscode
perf.data*
build*
stages:
- test
test:
stage: test
script:
- mkdir -p build/debug
- cd build/debug
- cmake -DCMAKE_BUILD_TYPE=Debug ../..
- make -j8
- ./test_all
cache:
key: "$CI_JOB_STAGE-$CI_COMMIT_REF_NAME"
paths:
- build/debug
policy: pull-push
tags:
- "fdedi"
clear_cache_test:
stage: test
script:
- rm -rf ./build/*
cache:
key: "$CI_JOB_STAGE-$CI_COMMIT_REF_NAME"
paths:
- build/
policy: pull-push
when: manual
allow_failure: true
cmake_minimum_required(VERSION 3.5)
project(tpchjoinoptimized C CXX)
set(CMAKE_C_COMPILER gcc)
set(CMAKE_CXX_COMPILER g++)
set(CMAKE_CXX_STANDARD 17)
# Compiler flags for the different targets
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fPIC -Wall -Wextra -fno-omit-frame-pointer -march=native -Wno-unknown-pragmas")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fPIC -Wall -Wextra -fno-omit-frame-pointer -march=native")
find_package(Threads)
include(lib/gtest.cmake)
### Library for join query
add_library(sum src/sum.cpp)
target_include_directories(sum PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>
PRIVATE src)
### Tests
add_executable(test_all test/test_sum.cpp)
target_link_libraries(test_all sum gtest gtest_main pthread)
### main executable
add_executable(main src/main.cpp)
target_link_libraries(main sum)
# FDE Exercise 2.5
## Task
Implement the missing parts of the function sum_extendedprice in src/sum.cpp.
You can use the test provided in test/test_sum.cpp to check if your implementation works
correctly.
## Build
A configuration file is provided to build this project with CMake.
This allows you to build the project in the terminal but also
provides the option to use Jetbrains CLion or Microsoft Visual Studio
and other IDEs.
Building from Terminal:
Start in the project directory.
```
mkdir -p build/debug
cd build/debug
cmake -DCMAKE_BUILD_TYPE=Debug ../..
make
```
This creates the binaries test_all and main.
Make sure your builds are not failing! <br/>
*Left Sidebar > CI /CD > Pipelines*
#pragma once
#include <string>
int64_t sum_extendedprice(const std::string& file_name);
#include <string>
//---------------------------------------------------------------------------
inline std::string getDir(const std::string &file)
/// Returns path to file in 'file'
{
size_t found = file.find_last_of("/\\");
return (file.substr(0, found));
}
//---------------------------------------------------------------------------
include(ExternalProject)
find_package(Git REQUIRED)
# Get benchmark
ExternalProject_Add(
benchmark_src
PREFIX "vendor/benchmark"
GIT_REPOSITORY "https://github.com/google/benchmark.git"
GIT_TAG v1.2.0
TIMEOUT 10
CMAKE_ARGS
-DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/vendor/benchmark
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
UPDATE_COMMAND ""
)
# Prepare benchmark
ExternalProject_Get_Property(benchmark_src install_dir)
set(BENCHMARK_INCLUDE_DIR ${install_dir}/include)
set(BENCHMARK_LIBRARY_PATH ${install_dir}/lib/libbenchmark.a)
file(MAKE_DIRECTORY ${BENCHMARK_INCLUDE_DIR})
add_library(benchmark STATIC IMPORTED)
set_property(TARGET benchmark PROPERTY IMPORTED_LOCATION ${BENCHMARK_LIBRARY_PATH})
set_property(TARGET benchmark APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${BENCHMARK_INCLUDE_DIR})
# Dependencies
add_dependencies(benchmark benchmark_src)
include(ExternalProject)
find_package(Git REQUIRED)
find_package(Threads REQUIRED)
# Get googletest
ExternalProject_Add(
googletest
PREFIX "vendor/gtm"
GIT_REPOSITORY "https://github.com/google/googletest.git"
GIT_TAG release-1.8.0
TIMEOUT 10
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
UPDATE_COMMAND ""
)
# Build gtest
ExternalProject_Add(
gtest_src
PREFIX "vendor/gtm"
SOURCE_DIR "vendor/gtm/src/googletest/googletest"
INSTALL_DIR "vendor/gtm/gtest"
CMAKE_ARGS
-DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/vendor/gtm/gtest
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
DOWNLOAD_COMMAND ""
UPDATE_COMMAND ""
)
# Build gmock
ExternalProject_Add(
gmock_src
PREFIX "vendor/gtm"
SOURCE_DIR "vendor/gtm/src/googletest/googlemock"
INSTALL_DIR "vendor/gtm/gmock"
CMAKE_ARGS
-DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/vendor/gtm/gmock
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
DOWNLOAD_COMMAND ""
UPDATE_COMMAND ""
)
# Prepare gtest
ExternalProject_Get_Property(gtest_src install_dir)
set(GTEST_INCLUDE_DIR ${install_dir}/include )
set(GTEST_LIBRARY_PATH ${install_dir}/lib/libgtest.a)
file(MAKE_DIRECTORY ${GTEST_INCLUDE_DIR})
add_library(gtest STATIC IMPORTED)
set_property(TARGET gtest PROPERTY IMPORTED_LOCATION ${GTEST_LIBRARY_PATH})
set_property(TARGET gtest APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${GTEST_INCLUDE_DIR})
add_library(gtest_main STATIC IMPORTED)
set(GTEST_MAIN_LIBRARY_PATH ${install_dir}/lib/libgtest_main.a)
set_property(TARGET gtest_main PROPERTY IMPORTED_LOCATION ${GTEST_MAIN_LIBRARY_PATH})
set_property(TARGET gtest_main APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${GTEST_INCLUDE_DIR})
# Prepare gmock
ExternalProject_Get_Property(gmock_src install_dir)
set(GMOCK_INCLUDE_DIR ${install_dir}/include)
set(GMOCK_LIBRARY_PATH ${install_dir}/lib/libgmock.a)
file(MAKE_DIRECTORY ${GMOCK_INCLUDE_DIR})
add_library(gmock STATIC IMPORTED)
set_property(TARGET gmock PROPERTY IMPORTED_LOCATION ${GMOCK_LIBRARY_PATH})
set_property(TARGET gmock APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${GMOCK_INCLUDE_DIR})
# Dependencies
add_dependencies(gtest_src googletest)
add_dependencies(gmock_src googletest)
add_dependencies(gtest gtest_src)
add_dependencies(gtest_main gtest_src)
add_dependencies(gmock gmock_src)
#include <iostream>
#include <string>
#include "sum.hpp"
int main(int argc, char* argv[]) {
if (argc < 2) {
std::cout << "Usage: " << argv[0] << " <lineitem.tbl>";
return 1;
}
int64_t price_sum = sum_extendedprice(argv[1]);
std::cout << price_sum << std::endl;
return 0;
}
#include <iostream>
#include <string>
#include <string_view>
#include "thirdparty/parser.hpp"
using namespace aria::csv;
int64_t ToInt(std::string_view s) {
int64_t result = 0;
for (auto c : s) result = result * 10 + (c - '0');
return result;
}
int64_t sum_extendedprice(const std::string& file_name) {
std::ifstream f(file_name);
CsvParser lineitem = CsvParser(f).delimiter('|');
int64_t price_sum = 0;
uint64_t field_number = 0;
for (auto row : lineitem) {
for (const auto& field : row) {
//-- TODO exercise 2.5
// your code goes here
//--
}
}
return price_sum;
}
// Tasks:
// * Compare this to awk execution time
// * Estimate how fast the program can possibly be on you local machine. Assume
// that the input file is read from memory
// * Find out where most of the time is spent and why this program is not even
// close to your estimates. Tools that can help here: perf stat, perf record,
// perf report
// *
This diff is collapsed.
#include <gtest/gtest.h>
#include <cstring>
#include <fstream>
#include "sum.hpp"
#include "util.hpp"
//---------------------------------------------------------------------------
TEST(SumTest, SumExtendedPrice)
/// test if sum_extendedprice()
{
auto lineitem_file = getDir(__FILE__) + "/data/tpch/sf0_001/lineitem.tbl";
int64_t result = sum_extendedprice(lineitem_file);
ASSERT_EQ(result, 15277439838ul);
}
//---------------------------------------------------------------------------
This diff is collapsed.
#ifndef ARIA_CSV_H
#define ARIA_CSV_H
#include <fstream>
#include <memory>
#include <stdexcept>
#include <string>
#include <vector>
namespace aria {
namespace csv {
enum class Term : char { CRLF = -2 };
enum class FieldType { DATA, ROW_END, CSV_END };
using CSV = std::vector<std::vector<std::string>>;
// Checking for '\n', '\r', and '\r\n' by default
bool operator==(const char c, const Term t) {
switch (t) {
case Term::CRLF:
return c == '\r' || c == '\n';
default:
return static_cast<char>(t) == c;
}
}
bool operator!=(const char c, const Term t) {
return !(c == t);
}
// Wraps returned fields so we can also indicate
// that we hit row endings or the end of the csv itself
struct Field {
explicit Field(FieldType t): type(t), data(nullptr) {}
explicit Field(const std::string& str): type(FieldType::DATA), data(&str) {}
FieldType type;
const std::string *data;
};
// Reads and parses lines from a csv file
class CsvParser {
private:
// CSV state for state machine
enum class State {
START_OF_FIELD,
IN_FIELD,
IN_QUOTED_FIELD,
IN_ESCAPED_QUOTE,
END_OF_ROW,
EMPTY
};
State m_state = State::START_OF_FIELD;
// Configurable attributes
char m_quote = '"';
char m_delimiter = ',';
Term m_terminator = Term::CRLF;
std::istream& m_input;
// Buffer capacities
static constexpr int FIELDBUF_CAP = 1024;
static constexpr int INPUTBUF_CAP = 1024 * 128;
// Buffers
std::string m_fieldbuf{};
char m_inputbuf[INPUTBUF_CAP]{};
// Misc
bool m_eof = false;
size_t m_cursor = INPUTBUF_CAP;
size_t m_inputbuf_size = INPUTBUF_CAP;
public:
// Creates the CSV parser which by default, splits on commas,
// uses quotes to escape, and handles CSV files that end in either
// '\r', '\n', or '\r\n'.
explicit CsvParser(std::istream& input): m_input(input) {
// Reserve space upfront to improve performance
m_fieldbuf.reserve(FIELDBUF_CAP);
if (!m_input.good()) {
throw std::runtime_error("Something is wrong with input stream");
}
}
// Change the quote character
CsvParser quote(char c) noexcept {
m_quote = c;
return *this;
}
// Change the delimiter character
CsvParser delimiter(char c) noexcept {
m_delimiter = c;
return *this;
}
// Change the terminator character
CsvParser terminator(char c) noexcept {
m_terminator = static_cast<Term>(c);
return *this;
}
// The parser is in the empty state when there are
// no more tokens left to read from the input buffer
bool empty() {
return m_state == State::EMPTY;
}
// Reads a single field from the CSV
Field next_field() {
if (empty()) {
return Field(FieldType::CSV_END);
}
m_fieldbuf.clear();
// This loop runs until either the parser has
// read a full field or until there's no tokens left to read
for (;;) {
char *maybe_token = top_token();
// If we're out of tokens to read return whatever's left in the
// field and row buffers. If there's nothing left, return null.
if (!maybe_token) {
m_state = State::EMPTY;
return !m_fieldbuf.empty() ? Field(m_fieldbuf) : Field(FieldType::CSV_END);
}
// Parsing the CSV is done using a finite state machine
char c = *maybe_token;
switch (m_state) {
case State::START_OF_FIELD:
m_cursor++;
if (c == m_terminator) {
handle_crlf(c);
return Field(FieldType::ROW_END);
}
if (c == m_quote) {
m_state = State::IN_QUOTED_FIELD;
} else if (c == m_delimiter) {
return Field(m_fieldbuf);
} else {
m_state = State::IN_FIELD;
m_fieldbuf += c;
}
break;
case State::IN_FIELD:
m_cursor++;
if (c == m_terminator) {
handle_crlf(c);
m_state = State::END_OF_ROW;
return Field(m_fieldbuf);
}
if (c == m_delimiter) {
m_state = State::START_OF_FIELD;
return Field(m_fieldbuf);
} else {
m_fieldbuf += c;
}
break;
case State::IN_QUOTED_FIELD:
m_cursor++;
if (c == m_quote) {
m_state = State::IN_ESCAPED_QUOTE;
} else {
m_fieldbuf += c;
}
break;
case State::IN_ESCAPED_QUOTE:
m_cursor++;
if (c == m_terminator) {
handle_crlf(c);
m_state = State::END_OF_ROW;
return Field(m_fieldbuf);
}
if (c == m_quote) {
m_state = State::IN_QUOTED_FIELD;
m_fieldbuf += c;
} else if (c == m_delimiter) {
m_state = State::START_OF_FIELD;
return Field(m_fieldbuf);
} else {
m_state = State::IN_FIELD;
m_fieldbuf += c;
}
break;
case State::END_OF_ROW:
m_state = State::START_OF_FIELD;
return Field(FieldType::ROW_END);
case State::EMPTY:
throw std::logic_error("You goofed");
}
}
}
private:
// When the parser hits the end of a line it needs
// to check the special case of '\r\n' as a terminator.
// If it finds that the previous token was a '\r', and
// the next token will be a '\n', it skips the '\n'.
void handle_crlf(const char c) {
if (m_terminator != Term::CRLF || c != '\r') {
return;
}
char *token = top_token();
if (token && *token == '\n') {
m_cursor++;
}
}
// Pulls the next token from the input buffer, but does not move
// the cursor forward. If the stream is empty and the input buffer
// is also empty return a nullptr.
char* top_token() {
// Return null if there's nothing left to read
if (m_eof && m_cursor == m_inputbuf_size) {
return nullptr;
}
// Refill the input buffer if it's been fully read
if (m_cursor == m_inputbuf_size) {
m_cursor = 0;
m_input.read(m_inputbuf, INPUTBUF_CAP);
// Indicate we hit end of file, and resize
// input buffer to show that it's not at full capacity
if (m_input.eof()) {
m_eof = true;
m_inputbuf_size = m_input.gcount();
// Return null if there's nothing left to read
if (m_inputbuf_size == 0) {
return nullptr;
}
}
}
return &m_inputbuf[m_cursor];
}
public:
// Iterator implementation for the CSV parser, which reads
// from the CSV row by row in the form of a vector of strings
class iterator {
public:
using difference_type = std::ptrdiff_t;
using value_type = std::vector<std::string>;
using pointer = const std::vector<std::string>*;
using reference = const std::vector<std::string>&;
using iterator_category = std::input_iterator_tag;
explicit iterator(CsvParser *p, bool end = false): m_parser(p) {
if (!end) {
m_row.reserve(50);
m_current_row = 0;
next();
}
}
iterator& operator++() {
next();
return *this;
}
iterator operator++(int) {
iterator i = (*this);
++(*this);
return i;
}
bool operator==(const iterator& other) const {
return m_current_row == other.m_current_row
&& m_row.size() == other.m_row.size();
}
bool operator!=(const iterator& other) const {
return !(*this == other);
}
reference operator*() const {
return m_row;
}
pointer operator->() const {
return &m_row;
}
private:
value_type m_row{};
CsvParser *m_parser;
int m_current_row = -1;
void next() {
value_type::size_type num_fields = 0;
for (;;) {
auto field = m_parser->next_field();
switch (field.type) {
case FieldType::CSV_END:
if (num_fields < m_row.size()) {
m_row.resize(num_fields);
}
m_current_row = -1;
return;
case FieldType::ROW_END:
if (num_fields < m_row.size()) {
m_row.resize(num_fields);
}
m_current_row++;
return;
case FieldType::DATA:
if (num_fields < m_row.size()) {
m_row[num_fields] = std::move(*field.data);
} else {
m_row.push_back(std::move(*field.data));
}
num_fields++;
}
}
}
};
iterator begin() { return iterator(this); };
iterator end() { return iterator(this, true); };
};
}
}
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment