utf8proc

A clean C library for processing UTF-8 Unicode data
git clone https://git.sinitax.com/juliastrings/utf8proc
Log | Files | Refs | README | LICENSE | sfeed.txt

commit 08f101a9e8c6a72dfdb1c9b913df880e13a36333
parent 50381b951a2b156c1c236c77d34ac0fddbc0ea46
Author: Steven G. Johnson <stevenj@mit.edu>
Date:   Mon,  9 Mar 2015 22:40:51 -0400

Merge pull request #28 from tkelman/tk/cmake

WIP: Minimal cmake build script
Diffstat:
M.gitignore | 2+-
M.travis.yml | 8++++++++
ACMakeLists.txt | 31+++++++++++++++++++++++++++++++
MMakefile | 1+
MNEWS.md | 2+-
Aappveyor.yml | 35+++++++++++++++++++++++++++++++++++
Alump.md | 27+++++++++++++++++++++++++++
Dlump.txt | 26--------------------------
Mutf8proc.h | 2+-
Autils.cmake | 20++++++++++++++++++++
10 files changed, 125 insertions(+), 29 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -8,8 +8,8 @@ *.dll *.dylib *.dSYM -*.txt *.out +data/*.txt bench/bench bench/icu bench/unistring diff --git a/.travis.yml b/.travis.yml @@ -8,3 +8,11 @@ script: - make prefix=`pwd`/local install - make check - make utf8proc_data.c.new && (diff utf8proc_data.c.new utf8proc_data.c > /dev/null) + - mkdir build_static + - cd build_static + - cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON + - make + - mkdir ../build_shared + - cd ../build_shared + - cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON -DBUILD_SHARED_LIBS=ON + - make diff --git a/CMakeLists.txt b/CMakeLists.txt @@ -0,0 +1,31 @@ +cmake_minimum_required (VERSION 2.8) + +include (utils.cmake) + +disallow_intree_builds() + +project (utf8proc C) + +# Be sure to also update these in Makefile! +set(SO_MAJOR 1) +set(SO_MINOR 2) +set(SO_PATCH 0) + +add_definitions ( + -DUTF8PROC_EXPORTS +) + +if (NOT MSVC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2 -std=c99 -pedantic -Wall") +endif () + +add_library (utf8proc + utf8proc.c + utf8proc.h +) + +set_target_properties (utf8proc PROPERTIES + POSITION_INDEPENDENT_CODE ON + VERSION "${SO_MAJOR}.${SO_MINOR}.${SO_PATCH}" + SOVERSION ${SO_MAJOR} +) diff --git a/Makefile b/Makefile @@ -16,6 +16,7 @@ cc = $(CC) $(cflags) # from the utf8proc version number because it indicates ABI compatibility, # not API compatibility: MAJOR should be incremented whenever *binary* # compatibility is broken, even if the API is backward-compatible +# Be sure to also update these in CMakeLists.txt! MAJOR=1 MINOR=2 PATCH=0 diff --git a/NEWS.md b/NEWS.md @@ -105,7 +105,7 @@ Release of version 1.0.1 2006-09-17: -- added the `LUMP` option, which lumps certain characters together (see `lump.txt`) (also used for the PostgreSQL `unifold` function) +- added the `LUMP` option, which lumps certain characters together (see `lump.md`) (also used for the PostgreSQL `unifold` function) - added the `STRIPMARK` option, which strips marking characters (or marks of composed characters) - deprecated ruby method `String#char_ary` in favour of `String#utf8chars` diff --git a/appveyor.yml b/appveyor.yml @@ -0,0 +1,35 @@ +branches: + only: + - master + +notifications: + - provider: Email + on_build_success: false + on_build_failure: false + on_build_status_changed: false + +build_script: + - ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod ` + https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | ` + Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { ` + throw "There are newer queued builds for this pull request, failing early." } + - mkdir msvc_static + - cd msvc_static + - cmake .. + - cmake --build . + - mkdir ..\msvc_shared + - cd ..\msvc_shared + - cmake .. -DBUILD_SHARED_LIBS=ON + - cmake --build . + - C:\MinGW\msys\1.0\bin\sh --login -c " + echo 'C:\MinGW\ /MinGW' > /etc/fstab; + cd /c/projects/utf8proc; + mkdir mingw_static; + cd mingw_static; + cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON -G'MSYS Makefiles'; + make; + mkdir ../mingw_shared; + cd ../mingw_shared; + cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON -DBUILD_SHARED_LIBS=ON -G'MSYS Makefiles'; + make + " diff --git a/lump.md b/lump.md @@ -0,0 +1,27 @@ +``` +U+0020 <-- all space characters (general category Zs) +U+0027 ' <-- left/right single quotation mark U+2018..2019, + modifier letter apostrophe U+02BC, + modifier letter vertical line U+02C8 +U+002D - <-- all dash characters (general category Pd), + minus U+2212 +U+002F / <-- fraction slash U+2044, + division slash U+2215 +U+003A : <-- ratio U+2236 +U+003C < <-- single left-pointing angle quotation mark U+2039, + left-pointing angle bracket U+2329, + left angle bracket U+3008 +U+003E > <-- single right-pointing angle quotation mark U+203A, + right-pointing angle bracket U+232A, + right angle bracket U+3009 +U+005C \ <-- set minus U+2216 +U+005E ^ <-- modifier letter up arrowhead U+02C4, + modifier letter circumflex accent U+02C6, + caret U+2038, + up arrowhead U+2303 +U+005F _ <-- all connector characters (general category Pc), + modifier letter low macron U+02CD +U+0060 ` <-- modifier letter grave accent U+02CB +U+007C | <-- divides U+2223 +U+007E ~ <-- tilde operator U+223C +``` diff --git a/lump.txt b/lump.txt @@ -1,26 +0,0 @@ -U+0020 <-- all space characters (general category Zs) -U+0027 ' <-- left/right single quotation mark U+2018..2019, - modifier letter apostrophe U+02BC, - modifier letter vertical line U+02C8 -U+002D - <-- all dash characters (general category Pd), - minus U+2212 -U+002F / <-- fraction slash U+2044, - division slash U+2215 -U+003A : <-- ratio U+2236 -U+003C < <-- single left-pointing angle quotation mark U+2039, - left-pointing angle bracket U+2329, - left angle bracket U+3008 -U+003E > <-- single right-pointing angle quotation mark U+203A, - right-pointing angle bracket U+232A, - right angle bracket U+3009 -U+005C \ <-- set minus U+2216 -U+005E ^ <-- modifier letter up arrowhead U+02C4, - modifier letter circumflex accent U+02C6, - caret U+2038, - up arrowhead U+2303 -U+005F _ <-- all connector characters (general category Pc), - modifier letter low macron U+02CD -U+0060 ` <-- modifier letter grave accent U+02CB -U+007C | <-- divides U+2223 -U+007E ~ <-- tilde operator U+223C - diff --git a/utf8proc.h b/utf8proc.h @@ -140,7 +140,7 @@ extern "C" { * is representing a single grapheme cluster (see UAX#29). * LUMP: Lumps certain characters together * (e.g. HYPHEN U+2010 and MINUS U+2212 to ASCII "-"). - * (See lump.txt for details.) + * (See lump.md for details.) * If NLF2LF is set, this includes a transformation of * paragraph and line separators to ASCII line-feed (LF). * STRIPMARK: Strips all character markings diff --git a/utils.cmake b/utils.cmake @@ -0,0 +1,20 @@ + +function (disallow_intree_builds) + # Adapted from LLVM's toplevel CMakeLists.txt file + if( CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND NOT MSVC_IDE ) + message(FATAL_ERROR " + In-source builds are not allowed. CMake would overwrite the + makefiles distributed with utf8proc. Please create a directory + and run cmake from there. Building in a subdirectory is + fine, e.g.: + + mkdir build + cd build + cmake .. + + This process created the file `CMakeCache.txt' and the + directory `CMakeFiles'. Please delete them. + + ") + endif() +endfunction()